import requests import json import time import os def test_gpu_only_ocr(): """Test OCR PDF upload with GPU-only processing""" # Server configuration base_url = "http://localhost:3015" upload_url = f"{base_url}/documents/upload" # Try without authentication first headers = {} # File to upload ocr_pdf_path = "ocr.pdf" if not os.path.exists(ocr_pdf_path): print(f"Error: OCR PDF file not found at {ocr_pdf_path}") return False # First, let's check server status try: status_response = requests.get(f"{base_url}/documents/pipeline_status", headers=headers) if status_response.status_code == 200: print("Server is running and accessible") else: print(f"Server status check failed: {status_response.status_code}") except Exception as e: print(f"Error connecting to server: {e}") return False # Upload the OCR PDF print(f"Uploading OCR PDF: {ocr_pdf_path}") try: with open(ocr_pdf_path, 'rb') as file: files = {'file': (os.path.basename(ocr_pdf_path), file, 'application/pdf')} response = requests.post(upload_url, files=files, headers=headers) if response.status_code == 200: result = response.json() print(f"Upload successful: {result}") track_id = result.get('track_id') if track_id: print(f"Tracking ID: {track_id}") print("Waiting for processing to complete...") # Monitor processing status for i in range(30): # Wait up to 5 minutes time.sleep(10) status_url = f"{base_url}/documents/track_status/{track_id}" status_response = requests.get(status_url, headers=headers) if status_response.status_code == 200: status_data = status_response.json() documents = status_data.get('documents', []) if documents: doc_status = documents[0].get('status') print(f"Document status: {doc_status}") if doc_status == "PROCESSED": print("OCR processing completed successfully!") print(f"Content summary: {documents[0].get('content_summary')}") print(f"Content length: {documents[0].get('content_length')}") return True elif doc_status == "FAILED": print(f"OCR processing failed: {documents[0].get('error_msg')}") return False else: print("No documents found in track status") else: print(f"Error checking status: {status_response.status_code}") print("Processing timeout - checking final status...") return False else: print("No track ID returned") return False else: print(f"Upload failed: {response.status_code} - {response.text}") return False except Exception as e: print(f"Error during upload: {e}") return False def check_server_logs(): """Check server logs for OCR-related messages""" log_file = "lightrag.log" if os.path.exists(log_file): print(f"\nChecking server logs in {log_file}...") with open(log_file, 'r', encoding='utf-8') as f: logs = f.read() # Look for OCR-related messages if "PaddleOCR" in logs: print("Found PaddleOCR references in logs") if "GPU" in logs: print("Found GPU references in logs") if "fallback" in logs.lower(): print("WARNING: Found fallback references in logs") if "error" in logs.lower(): print("Found error messages in logs") # Also check for any recent errors print("\nChecking for recent errors...") try: response = requests.get("http://localhost:3015/documents/pipeline_status") if response.status_code == 200: pipeline_status = response.json() print(f"Pipeline status: {pipeline_status}") except Exception as e: print(f"Error checking pipeline status: {e}") if __name__ == "__main__": print("Testing GPU-only OCR PDF upload...") print("=" * 50) success = test_gpu_only_ocr() print("\n" + "=" * 50) if success: print("✅ OCR PDF upload test completed successfully!") else: print("❌ OCR PDF upload test failed!") check_server_logs()