import requests import os import time base_url = 'http://localhost:3015' def test_ocr_upload(): print("Testing OCR PDF upload with GPU-accelerated PaddleOCR...") # Login with form data login_data = {'username': 'jleu3482', 'password': 'jleu1212'} login_response = requests.post(f'{base_url}/login', data=login_data) print(f'Login status: {login_response.status_code}') if login_response.status_code == 200: token = login_response.json().get('access_token') headers = {'Authorization': f'Bearer {token}'} print('✓ Login successful') # Check current document status docs_response = requests.get(f'{base_url}/documents', headers=headers) print(f'Initial document status: {docs_response.status_code}') if docs_response.status_code == 200: docs = docs_response.json() statuses = docs.get('statuses', {}) print(f'Initial status - Completed: {len(statuses.get("completed", []))}, Processing: {len(statuses.get("processing", []))}, Failed: {len(statuses.get("failed", []))}') # Upload OCR PDF - use a different file to avoid duplicates pdf_path = 'test_meaningful.pdf' if os.path.exists(pdf_path): print(f'Uploading PDF: {pdf_path}') with open(pdf_path, 'rb') as f: files = {'file': (os.path.basename(pdf_path), f, 'application/pdf')} upload_response = requests.post(f'{base_url}/documents/upload', files=files, headers=headers) print(f'Upload status: {upload_response.status_code}') print(f'Upload response: {upload_response.text}') # Check document status immediately after upload if upload_response.status_code == 200: print("\nChecking document status...") # Monitor progress for 60 seconds to catch OCR processing print("Monitoring OCR processing for 60 seconds...") for i in range(60): time.sleep(1) docs_response = requests.get(f'{base_url}/documents', headers=headers) if docs_response.status_code == 200: docs = docs_response.json() statuses = docs.get('statuses', {}) processing = len(statuses.get("processing", [])) completed = len(statuses.get("completed", [])) failed = len(statuses.get("failed", [])) print(f'Progress after {i+1}s: Processing={processing}, Completed={completed}, Failed={failed}') if completed > 0: print("✓ OCR processing completed successfully!") return True elif failed > 0: print("✗ OCR processing failed!") # Show error details failed_docs = statuses.get("failed", []) for doc in failed_docs: print(f" Failed document: {doc.get('file_path')} - {doc.get('error_msg', 'Unknown error')}") return False print("⚠ OCR processing timed out after 60 seconds") return False else: print(f'PDF file not found: {pdf_path}') print("Available PDF files:") for file in os.listdir('.'): if file.lower().endswith('.pdf'): print(f" - {file}") return False else: print(f'Login failed: {login_response.text}') return False if __name__ == "__main__": test_ocr_upload()