Files
railseek6/test_ocr_upload_fixed.py

72 lines
3.5 KiB
Python

import requests
import os
import time
base_url = 'http://localhost:3015'
def test_ocr_upload():
print("Testing OCR PDF upload with GPU-accelerated PaddleOCR...")
# Login with form data
login_data = {'username': 'jleu3482', 'password': 'jleu1212'}
login_response = requests.post(f'{base_url}/login', data=login_data)
print(f'Login status: {login_response.status_code}')
if login_response.status_code == 200:
token = login_response.json().get('access_token')
headers = {'Authorization': f'Bearer {token}'}
print('✓ Login successful')
# Upload OCR PDF
pdf_path = 'ocr.pdf'
if os.path.exists(pdf_path):
print(f'Uploading PDF: {pdf_path}')
with open(pdf_path, 'rb') as f:
files = {'file': (os.path.basename(pdf_path), f, 'application/pdf')}
upload_response = requests.post(f'{base_url}/documents/upload', files=files, headers=headers)
print(f'Upload status: {upload_response.status_code}')
print(f'Upload response: {upload_response.text}')
# Check document status immediately after upload
if upload_response.status_code == 200:
print("\nChecking document status...")
docs_response = requests.get(f'{base_url}/documents', headers=headers)
print(f'Documents status: {docs_response.status_code}')
if docs_response.status_code == 200:
docs = docs_response.json()
statuses = docs.get('statuses', {})
print(f'Current document status:')
print(f' Completed: {len(statuses.get("completed", []))}')
print(f' Processing: {len(statuses.get("processing", []))}')
print(f' Failed: {len(statuses.get("failed", []))}')
# Monitor progress for 30 seconds
print("\nMonitoring OCR processing for 30 seconds...")
for i in range(30):
time.sleep(1)
docs_response = requests.get(f'{base_url}/documents', headers=headers)
if docs_response.status_code == 200:
docs = docs_response.json()
statuses = docs.get('statuses', {})
processing = len(statuses.get("processing", []))
completed = len(statuses.get("completed", []))
failed = len(statuses.get("failed", []))
print(f'Progress after {i+1}s: Processing={processing}, Completed={completed}, Failed={failed}')
if completed > 0:
print("✓ OCR processing completed successfully!")
return True
elif failed > 0:
print("✗ OCR processing failed!")
return False
print("⚠ OCR processing timed out after 30 seconds")
return False
else:
print(f'PDF file not found: {pdf_path}')
return False
else:
print(f'Login failed: {login_response.text}')
return False
if __name__ == "__main__":
test_ocr_upload()