railseek6/check_document_status.py

import requests

def check_document_status():
    base_url = 'http://localhost:3015'
    headers = {'X-API-Key': 'jleu1212', 'Content-Type': 'application/json'}

    # Get the document details
    response = requests.get(f'{base_url}/documents', headers=headers, timeout=10)
    if response.status_code == 200:
        data = response.json()
        doc = data['statuses']['processed'][0]
        print(f'Document ID: {doc["id"]}')
        print(f'File: {doc["file_path"]}')
        print(f'Content Summary: {doc["content_summary"]}')
        print(f'Content Length: {doc["content_length"]}')
        print(f'Chunks Count: {doc["chunks_count"]}')

        # The content summary shows the document was processed without classification metadata
        # This means the document needs to be re-processed with the updated processor
        print('\n🚨 ISSUE: The document was processed without classification metadata')
        print('   The content summary does not contain "Image Classifications:" or "bee"')
        print('   This means the document needs to be re-uploaded with the updated processor')

if __name__ == '__main__':
    check_document_status()