import requests import json import time def test_ocr_upload_and_status(): # Test login login_url = 'http://localhost:3015/login' login_data = {'username': 'jleu3482', 'password': 'jleu1212'} print("Testing LightRAG OCR upload and status...") try: # Login response = requests.post(login_url, data=login_data) print(f'Login response: {response.status_code}') if response.status_code == 200: print('Login successful') # Get documents list to check current status docs_url = 'http://localhost:3015/documents' docs_response = requests.get(docs_url) print(f'Documents response: {docs_response.status_code}') if docs_response.status_code == 200: documents = docs_response.json() print(f'Documents response: {documents}') # Handle the new API response format with statuses processed_docs = [] failed_docs = [] if isinstance(documents, dict) and 'statuses' in documents: processed_docs = documents['statuses'].get('processed', []) failed_docs = documents['statuses'].get('failed', []) print(f'Processed documents: {len(processed_docs)}') print(f'Failed documents: {len(failed_docs)}') for doc in processed_docs: print(f' - {doc.get("file_path", "Unknown")}: {doc.get("status", "Unknown")}') for doc in failed_docs: print(f' - {doc.get("file_path", "Unknown")}: {doc.get("status", "Unknown")} - {doc.get("error_msg", "No error message")}') else: print(f'Unexpected documents format: {type(documents)}') # Check if ocr.pdf is already uploaded and processed ocr_doc = None for doc in processed_docs: if doc.get('file_path') == 'ocr.pdf': ocr_doc = doc break if ocr_doc: print(f'\nOCR PDF found with status: {ocr_doc.get("status")}') if ocr_doc.get('status') == 'processed': print('OCR PDF already processed successfully!') return True else: print('OCR PDF exists but not processed, monitoring status...') else: print('\nOCR PDF not found, uploading...') # Upload ocr.pdf with open('ocr.pdf', 'rb') as f: files = {'file': ('ocr.pdf', f, 'application/pdf')} upload_response = requests.post('http://localhost:3015/documents/upload', files=files) print(f'Upload response: {upload_response.status_code}') if upload_response.status_code == 200: print('OCR PDF uploaded successfully!') else: print(f'Upload failed: {upload_response.text}') return False # Monitor processing status print('\nMonitoring processing status...') for i in range(30): # Monitor for up to 5 minutes time.sleep(10) docs_response = requests.get(docs_url) if docs_response.status_code == 200: documents = docs_response.json() # Handle the new API response format if isinstance(documents, dict) and 'statuses' in documents: processed_docs = documents['statuses'].get('processed', []) failed_docs = documents['statuses'].get('failed', []) for doc in processed_docs: if doc.get('file_path') == 'ocr.pdf': print('OCR PDF processing completed successfully!') return True for doc in failed_docs: if doc.get('file_path') == 'ocr.pdf': print(f'OCR PDF processing failed: {doc.get("error_msg", "Unknown error")}') return False print(f'Status check {i+1}: OCR PDF still processing...') else: print(f'Unexpected status format: {type(documents)}') print('Timeout waiting for processing to complete') return False else: print(f'Failed to get documents: {docs_response.text}') return False else: print(f'Login failed: {response.text}') return False except Exception as e: print(f'Error during test: {e}') return False if __name__ == '__main__': success = test_ocr_upload_and_status() if success: print('\n✅ OCR upload and processing test PASSED!') else: print('\n❌ OCR upload and processing test FAILED!')