Files
railseek6/check_document_status.py

25 lines
1.1 KiB
Python

import requests
def check_document_status():
base_url = 'http://localhost:3015'
headers = {'X-API-Key': 'jleu1212', 'Content-Type': 'application/json'}
# Get the document details
response = requests.get(f'{base_url}/documents', headers=headers, timeout=10)
if response.status_code == 200:
data = response.json()
doc = data['statuses']['processed'][0]
print(f'Document ID: {doc["id"]}')
print(f'File: {doc["file_path"]}')
print(f'Content Summary: {doc["content_summary"]}')
print(f'Content Length: {doc["content_length"]}')
print(f'Chunks Count: {doc["chunks_count"]}')
# The content summary shows the document was processed without classification metadata
# This means the document needs to be re-processed with the updated processor
print('\n🚨 ISSUE: The document was processed without classification metadata')
print(' The content summary does not contain "Image Classifications:" or "bee"')
print(' This means the document needs to be re-uploaded with the updated processor')
if __name__ == '__main__':
check_document_status()