Files
railseek6/check_ocr_status.py

56 lines
2.3 KiB
Python

import requests
import json
# Check the specific document that failed OCR
base_url = 'http://localhost:3015'
# Login first
login_data = {'username': 'jleu3482', 'password': 'jleu1212'}
login_response = requests.post(f'{base_url}/login', data=login_data)
if login_response.status_code == 200:
token = login_response.json().get('access_token')
headers = {'Authorization': f'Bearer {token}'}
print('✅ Login successful')
# Get the failed document details
print('🔍 Checking failed OCR document...')
docs_response = requests.get(f'{base_url}/documents', headers=headers)
if docs_response.status_code == 200:
documents = docs_response.json()
failed_docs = documents.get('statuses', {}).get('failed', [])
for doc in failed_docs:
if doc.get('file_path') == 'ocr.pdf':
print('❌ Failed OCR document found:')
print(f' File: {doc.get("file_path")}')
print(f' Error: {doc.get("error_msg")}')
print(f' Content Summary: {doc.get("content_summary")}')
print(f' Created: {doc.get("created_at")}')
# Check the successful document
print('\n✅ Checking successful document...')
processed_docs = documents.get('statuses', {}).get('processed', [])
for doc in processed_docs:
print(f' File: {doc.get("file_path")}')
print(f' Status: {doc.get("status")}')
print(f' Content Summary: {doc.get("content_summary")}')
print(f' Chunks: {doc.get("chunks_count")}')
# Check if there's a search endpoint in the OpenAPI docs
print('\n🔍 Checking OpenAPI documentation for search...')
try:
docs = requests.get(f'{base_url}/openapi.json')
if docs.status_code == 200:
openapi = docs.json()
paths = openapi.get('paths', {})
print('Available endpoints:')
for path, methods in paths.items():
if 'search' in path.lower() or 'query' in path.lower():
print(f' 🔍 {path}: {list(methods.keys())}')
else:
print(f' {path}: {list(methods.keys())}')
else:
print('OpenAPI JSON not available')
except Exception as e:
print(f'Error checking OpenAPI: {e}')