railseek6/check_ocr_status.py

import requests
import json

# Check the specific document that failed OCR
base_url = 'http://localhost:3015'

# Login first
login_data = {'username': 'jleu3482', 'password': 'jleu1212'}
login_response = requests.post(f'{base_url}/login', data=login_data)

if login_response.status_code == 200:
    token = login_response.json().get('access_token')
    headers = {'Authorization': f'Bearer {token}'}
    print('✅ Login successful')

    # Get the failed document details
    print('🔍 Checking failed OCR document...')
    docs_response = requests.get(f'{base_url}/documents', headers=headers)
    if docs_response.status_code == 200:
        documents = docs_response.json()
        failed_docs = documents.get('statuses', {}).get('failed', [])

        for doc in failed_docs:
            if doc.get('file_path') == 'ocr.pdf':
                print('❌ Failed OCR document found:')
                print(f'   File: {doc.get("file_path")}')
                print(f'   Error: {doc.get("error_msg")}')
                print(f'   Content Summary: {doc.get("content_summary")}')
                print(f'   Created: {doc.get("created_at")}')

    # Check the successful document
    print('\n✅ Checking successful document...')
    processed_docs = documents.get('statuses', {}).get('processed', [])
    for doc in processed_docs:
        print(f'   File: {doc.get("file_path")}')
        print(f'   Status: {doc.get("status")}')
        print(f'   Content Summary: {doc.get("content_summary")}')
        print(f'   Chunks: {doc.get("chunks_count")}')

    # Check if there's a search endpoint in the OpenAPI docs
    print('\n🔍 Checking OpenAPI documentation for search...')
    try:
        docs = requests.get(f'{base_url}/openapi.json')
        if docs.status_code == 200:
            openapi = docs.json()
            paths = openapi.get('paths', {})
            print('Available endpoints:')
            for path, methods in paths.items():
                if 'search' in path.lower() or 'query' in path.lower():
                    print(f'  🔍 {path}: {list(methods.keys())}')
                else:
                    print(f'  {path}: {list(methods.keys())}')
        else:
            print('OpenAPI JSON not available')
    except Exception as e:
        print(f'Error checking OpenAPI: {e}')