#!/usr/bin/env python3 """ Final test script to upload ocr.pdf and verify upload, indexing, and search functionality """ import requests import json import time import sys def test_ocr_pdf_workflow(): """Test complete OCR PDF workflow: upload, indexing, and search""" base_url = "http://localhost:3015" api_key = "jleu1212" print("=== Testing LightRAG OCR PDF Workflow ===") # 1. Check server health print("\n1. Checking server health...") try: health_response = requests.get(f"{base_url}/health") if health_response.status_code == 200: print(" ✅ Server is healthy") print(f" Status: {health_response.json().get('status', 'unknown')}") else: print(f" ❌ Server health check failed: {health_response.status_code}") return False except Exception as e: print(f" ❌ Server health check error: {e}") return False # 2. Upload ocr.pdf print("\n2. Uploading ocr.pdf...") try: with open("ocr.pdf", "rb") as f: files = {"file": ("ocr.pdf", f, "application/pdf")} headers = {"X-API-Key": api_key} upload_response = requests.post( f"{base_url}/api/upload", files=files, headers=headers ) if upload_response.status_code == 200: print(" ✅ File uploaded successfully") upload_result = upload_response.json() print(f" Response: {upload_result}") else: print(f" ❌ Upload failed: {upload_response.status_code}") print(f" Response: {upload_response.text}") return False except Exception as e: print(f" ❌ Upload error: {e}") return False # 3. Wait for indexing to complete print("\n3. Waiting for indexing to complete...") max_wait_time = 120 # 2 minutes wait_interval = 5 elapsed_time = 0 while elapsed_time < max_wait_time: try: # Check document status status_response = requests.get(f"{base_url}/api/documents", headers={"X-API-Key": api_key}) if status_response.status_code == 200: documents = status_response.json() if documents: doc_status = documents[0].get("status", "unknown") print(f" Document status: {doc_status} (elapsed: {elapsed_time}s)") if doc_status == "PROCESSED": print(" ✅ Indexing completed successfully") break elif doc_status == "FAILED": print(" ❌ Indexing failed") return False else: print(" No documents found, waiting...") else: print(f" Status check failed: {status_response.status_code}") except Exception as e: print(f" Status check error: {e}") time.sleep(wait_interval) elapsed_time += wait_interval if elapsed_time >= max_wait_time: print(" ⚠️ Indexing timeout - proceeding with search test") # 4. Test search functionality print("\n4. Testing search functionality...") test_queries = [ "artificial intelligence", "machine learning", "neural networks", "data science" ] search_success = False for query in test_queries: try: search_payload = { "query": query, "top_k": 5 } search_response = requests.post( f"{base_url}/api/search", json=search_payload, headers={"X-API-Key": api_key, "Content-Type": "application/json"} ) if search_response.status_code == 200: search_results = search_response.json() if search_results and len(search_results) > 0: print(f" ✅ Search successful for '{query}': Found {len(search_results)} results") search_success = True # Print first result snippet first_result = search_results[0] content_preview = first_result.get('content', '')[:100] + "..." print(f" First result preview: {content_preview}") break else: print(f" ⚠️ No results for '{query}'") else: print(f" ❌ Search failed for '{query}': {search_response.status_code}") except Exception as e: print(f" ❌ Search error for '{query}': {e}") # 5. Test web UI accessibility print("\n5. Testing web UI accessibility...") try: webui_response = requests.get(f"{base_url}/webui/") if webui_response.status_code == 200: print(" ✅ Web UI is accessible") else: print(f" ⚠️ Web UI returned status: {webui_response.status_code}") except Exception as e: print(f" ❌ Web UI access error: {e}") # Final summary print("\n=== Test Summary ===") if search_success: print("✅ SUCCESS: OCR PDF workflow is working correctly!") print(" - File upload: ✓") print(" - Indexing: ✓") print(" - Search: ✓") print(" - Web UI: ✓") return True else: print("❌ FAILURE: Some tests failed") return False if __name__ == "__main__": success = test_ocr_pdf_workflow() sys.exit(0 if success else 1)