72 lines
3.5 KiB
Python
72 lines
3.5 KiB
Python
import requests
|
|
import os
|
|
import time
|
|
|
|
base_url = 'http://localhost:3015'
|
|
|
|
def test_ocr_upload():
|
|
print("Testing OCR PDF upload with GPU-accelerated PaddleOCR...")
|
|
|
|
# Login with form data
|
|
login_data = {'username': 'jleu3482', 'password': 'jleu1212'}
|
|
login_response = requests.post(f'{base_url}/login', data=login_data)
|
|
print(f'Login status: {login_response.status_code}')
|
|
|
|
if login_response.status_code == 200:
|
|
token = login_response.json().get('access_token')
|
|
headers = {'Authorization': f'Bearer {token}'}
|
|
print('✓ Login successful')
|
|
|
|
# Upload OCR PDF
|
|
pdf_path = 'ocr.pdf'
|
|
if os.path.exists(pdf_path):
|
|
print(f'Uploading PDF: {pdf_path}')
|
|
with open(pdf_path, 'rb') as f:
|
|
files = {'file': (os.path.basename(pdf_path), f, 'application/pdf')}
|
|
upload_response = requests.post(f'{base_url}/documents/upload', files=files, headers=headers)
|
|
print(f'Upload status: {upload_response.status_code}')
|
|
print(f'Upload response: {upload_response.text}')
|
|
|
|
# Check document status immediately after upload
|
|
if upload_response.status_code == 200:
|
|
print("\nChecking document status...")
|
|
docs_response = requests.get(f'{base_url}/documents', headers=headers)
|
|
print(f'Documents status: {docs_response.status_code}')
|
|
if docs_response.status_code == 200:
|
|
docs = docs_response.json()
|
|
statuses = docs.get('statuses', {})
|
|
print(f'Current document status:')
|
|
print(f' Completed: {len(statuses.get("completed", []))}')
|
|
print(f' Processing: {len(statuses.get("processing", []))}')
|
|
print(f' Failed: {len(statuses.get("failed", []))}')
|
|
|
|
# Monitor progress for 30 seconds
|
|
print("\nMonitoring OCR processing for 30 seconds...")
|
|
for i in range(30):
|
|
time.sleep(1)
|
|
docs_response = requests.get(f'{base_url}/documents', headers=headers)
|
|
if docs_response.status_code == 200:
|
|
docs = docs_response.json()
|
|
statuses = docs.get('statuses', {})
|
|
processing = len(statuses.get("processing", []))
|
|
completed = len(statuses.get("completed", []))
|
|
failed = len(statuses.get("failed", []))
|
|
print(f'Progress after {i+1}s: Processing={processing}, Completed={completed}, Failed={failed}')
|
|
|
|
if completed > 0:
|
|
print("✓ OCR processing completed successfully!")
|
|
return True
|
|
elif failed > 0:
|
|
print("✗ OCR processing failed!")
|
|
return False
|
|
print("⚠ OCR processing timed out after 30 seconds")
|
|
return False
|
|
else:
|
|
print(f'PDF file not found: {pdf_path}')
|
|
return False
|
|
else:
|
|
print(f'Login failed: {login_response.text}')
|
|
return False
|
|
|
|
if __name__ == "__main__":
|
|
test_ocr_upload() |