117 lines
5.4 KiB
Python
117 lines
5.4 KiB
Python
import requests
|
|
import json
|
|
import time
|
|
|
|
def test_ocr_upload_and_status():
|
|
# Test login
|
|
login_url = 'http://localhost:3015/login'
|
|
login_data = {'username': 'jleu3482', 'password': 'jleu1212'}
|
|
|
|
print("Testing LightRAG OCR upload and status...")
|
|
|
|
try:
|
|
# Login
|
|
response = requests.post(login_url, data=login_data)
|
|
print(f'Login response: {response.status_code}')
|
|
|
|
if response.status_code == 200:
|
|
print('Login successful')
|
|
|
|
# Get documents list to check current status
|
|
docs_url = 'http://localhost:3015/documents'
|
|
docs_response = requests.get(docs_url)
|
|
print(f'Documents response: {docs_response.status_code}')
|
|
|
|
if docs_response.status_code == 200:
|
|
documents = docs_response.json()
|
|
print(f'Documents response: {documents}')
|
|
|
|
# Handle the new API response format with statuses
|
|
processed_docs = []
|
|
failed_docs = []
|
|
|
|
if isinstance(documents, dict) and 'statuses' in documents:
|
|
processed_docs = documents['statuses'].get('processed', [])
|
|
failed_docs = documents['statuses'].get('failed', [])
|
|
print(f'Processed documents: {len(processed_docs)}')
|
|
print(f'Failed documents: {len(failed_docs)}')
|
|
|
|
for doc in processed_docs:
|
|
print(f' - {doc.get("file_path", "Unknown")}: {doc.get("status", "Unknown")}')
|
|
for doc in failed_docs:
|
|
print(f' - {doc.get("file_path", "Unknown")}: {doc.get("status", "Unknown")} - {doc.get("error_msg", "No error message")}')
|
|
else:
|
|
print(f'Unexpected documents format: {type(documents)}')
|
|
|
|
# Check if ocr.pdf is already uploaded and processed
|
|
ocr_doc = None
|
|
for doc in processed_docs:
|
|
if doc.get('file_path') == 'ocr.pdf':
|
|
ocr_doc = doc
|
|
break
|
|
|
|
if ocr_doc:
|
|
print(f'\nOCR PDF found with status: {ocr_doc.get("status")}')
|
|
if ocr_doc.get('status') == 'processed':
|
|
print('OCR PDF already processed successfully!')
|
|
return True
|
|
else:
|
|
print('OCR PDF exists but not processed, monitoring status...')
|
|
else:
|
|
print('\nOCR PDF not found, uploading...')
|
|
# Upload ocr.pdf
|
|
with open('ocr.pdf', 'rb') as f:
|
|
files = {'file': ('ocr.pdf', f, 'application/pdf')}
|
|
upload_response = requests.post('http://localhost:3015/documents/upload', files=files)
|
|
print(f'Upload response: {upload_response.status_code}')
|
|
if upload_response.status_code == 200:
|
|
print('OCR PDF uploaded successfully!')
|
|
else:
|
|
print(f'Upload failed: {upload_response.text}')
|
|
return False
|
|
|
|
# Monitor processing status
|
|
print('\nMonitoring processing status...')
|
|
for i in range(30): # Monitor for up to 5 minutes
|
|
time.sleep(10)
|
|
docs_response = requests.get(docs_url)
|
|
if docs_response.status_code == 200:
|
|
documents = docs_response.json()
|
|
|
|
# Handle the new API response format
|
|
if isinstance(documents, dict) and 'statuses' in documents:
|
|
processed_docs = documents['statuses'].get('processed', [])
|
|
failed_docs = documents['statuses'].get('failed', [])
|
|
|
|
for doc in processed_docs:
|
|
if doc.get('file_path') == 'ocr.pdf':
|
|
print('OCR PDF processing completed successfully!')
|
|
return True
|
|
|
|
for doc in failed_docs:
|
|
if doc.get('file_path') == 'ocr.pdf':
|
|
print(f'OCR PDF processing failed: {doc.get("error_msg", "Unknown error")}')
|
|
return False
|
|
|
|
print(f'Status check {i+1}: OCR PDF still processing...')
|
|
else:
|
|
print(f'Unexpected status format: {type(documents)}')
|
|
print('Timeout waiting for processing to complete')
|
|
return False
|
|
else:
|
|
print(f'Failed to get documents: {docs_response.text}')
|
|
return False
|
|
else:
|
|
print(f'Login failed: {response.text}')
|
|
return False
|
|
|
|
except Exception as e:
|
|
print(f'Error during test: {e}')
|
|
return False
|
|
|
|
if __name__ == '__main__':
|
|
success = test_ocr_upload_and_status()
|
|
if success:
|
|
print('\n✅ OCR upload and processing test PASSED!')
|
|
else:
|
|
print('\n❌ OCR upload and processing test FAILED!') |