129 lines
4.9 KiB
Python
129 lines
4.9 KiB
Python
import requests
|
|
import json
|
|
import time
|
|
import os
|
|
|
|
def test_gpu_only_ocr():
|
|
"""Test OCR PDF upload with GPU-only processing"""
|
|
|
|
# Server configuration
|
|
base_url = "http://localhost:3015"
|
|
upload_url = f"{base_url}/documents/upload"
|
|
|
|
# Try without authentication first
|
|
headers = {}
|
|
|
|
# File to upload
|
|
ocr_pdf_path = "ocr.pdf"
|
|
|
|
if not os.path.exists(ocr_pdf_path):
|
|
print(f"Error: OCR PDF file not found at {ocr_pdf_path}")
|
|
return False
|
|
|
|
# First, let's check server status
|
|
try:
|
|
status_response = requests.get(f"{base_url}/documents/pipeline_status", headers=headers)
|
|
if status_response.status_code == 200:
|
|
print("Server is running and accessible")
|
|
else:
|
|
print(f"Server status check failed: {status_response.status_code}")
|
|
except Exception as e:
|
|
print(f"Error connecting to server: {e}")
|
|
return False
|
|
|
|
# Upload the OCR PDF
|
|
print(f"Uploading OCR PDF: {ocr_pdf_path}")
|
|
|
|
try:
|
|
with open(ocr_pdf_path, 'rb') as file:
|
|
files = {'file': (os.path.basename(ocr_pdf_path), file, 'application/pdf')}
|
|
response = requests.post(upload_url, files=files, headers=headers)
|
|
|
|
if response.status_code == 200:
|
|
result = response.json()
|
|
print(f"Upload successful: {result}")
|
|
track_id = result.get('track_id')
|
|
|
|
if track_id:
|
|
print(f"Tracking ID: {track_id}")
|
|
print("Waiting for processing to complete...")
|
|
|
|
# Monitor processing status
|
|
for i in range(30): # Wait up to 5 minutes
|
|
time.sleep(10)
|
|
status_url = f"{base_url}/documents/track_status/{track_id}"
|
|
status_response = requests.get(status_url, headers=headers)
|
|
|
|
if status_response.status_code == 200:
|
|
status_data = status_response.json()
|
|
documents = status_data.get('documents', [])
|
|
|
|
if documents:
|
|
doc_status = documents[0].get('status')
|
|
print(f"Document status: {doc_status}")
|
|
|
|
if doc_status == "PROCESSED":
|
|
print("OCR processing completed successfully!")
|
|
print(f"Content summary: {documents[0].get('content_summary')}")
|
|
print(f"Content length: {documents[0].get('content_length')}")
|
|
return True
|
|
elif doc_status == "FAILED":
|
|
print(f"OCR processing failed: {documents[0].get('error_msg')}")
|
|
return False
|
|
else:
|
|
print("No documents found in track status")
|
|
else:
|
|
print(f"Error checking status: {status_response.status_code}")
|
|
|
|
print("Processing timeout - checking final status...")
|
|
return False
|
|
else:
|
|
print("No track ID returned")
|
|
return False
|
|
else:
|
|
print(f"Upload failed: {response.status_code} - {response.text}")
|
|
return False
|
|
|
|
except Exception as e:
|
|
print(f"Error during upload: {e}")
|
|
return False
|
|
|
|
def check_server_logs():
|
|
"""Check server logs for OCR-related messages"""
|
|
log_file = "lightrag.log"
|
|
if os.path.exists(log_file):
|
|
print(f"\nChecking server logs in {log_file}...")
|
|
with open(log_file, 'r', encoding='utf-8') as f:
|
|
logs = f.read()
|
|
# Look for OCR-related messages
|
|
if "PaddleOCR" in logs:
|
|
print("Found PaddleOCR references in logs")
|
|
if "GPU" in logs:
|
|
print("Found GPU references in logs")
|
|
if "fallback" in logs.lower():
|
|
print("WARNING: Found fallback references in logs")
|
|
if "error" in logs.lower():
|
|
print("Found error messages in logs")
|
|
|
|
# Also check for any recent errors
|
|
print("\nChecking for recent errors...")
|
|
try:
|
|
response = requests.get("http://localhost:3015/documents/pipeline_status")
|
|
if response.status_code == 200:
|
|
pipeline_status = response.json()
|
|
print(f"Pipeline status: {pipeline_status}")
|
|
except Exception as e:
|
|
print(f"Error checking pipeline status: {e}")
|
|
|
|
if __name__ == "__main__":
|
|
print("Testing GPU-only OCR PDF upload...")
|
|
print("=" * 50)
|
|
|
|
success = test_gpu_only_ocr()
|
|
|
|
print("\n" + "=" * 50)
|
|
if success:
|
|
print("✅ OCR PDF upload test completed successfully!")
|
|
else:
|
|
print("❌ OCR PDF upload test failed!")
|
|
check_server_logs() |