157 lines
5.6 KiB
Python
157 lines
5.6 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
Final test script to upload ocr.pdf and verify upload, indexing, and search functionality
|
|
"""
|
|
|
|
import requests
|
|
import json
|
|
import time
|
|
import sys
|
|
|
|
def test_ocr_pdf_workflow():
|
|
"""Test complete OCR PDF workflow: upload, indexing, and search"""
|
|
|
|
base_url = "http://localhost:3015"
|
|
api_key = "jleu1212"
|
|
|
|
print("=== Testing LightRAG OCR PDF Workflow ===")
|
|
|
|
# 1. Check server health
|
|
print("\n1. Checking server health...")
|
|
try:
|
|
health_response = requests.get(f"{base_url}/health")
|
|
if health_response.status_code == 200:
|
|
print(" ✅ Server is healthy")
|
|
print(f" Status: {health_response.json().get('status', 'unknown')}")
|
|
else:
|
|
print(f" ❌ Server health check failed: {health_response.status_code}")
|
|
return False
|
|
except Exception as e:
|
|
print(f" ❌ Server health check error: {e}")
|
|
return False
|
|
|
|
# 2. Upload ocr.pdf
|
|
print("\n2. Uploading ocr.pdf...")
|
|
try:
|
|
with open("ocr.pdf", "rb") as f:
|
|
files = {"file": ("ocr.pdf", f, "application/pdf")}
|
|
headers = {"X-API-Key": api_key}
|
|
upload_response = requests.post(
|
|
f"{base_url}/api/upload",
|
|
files=files,
|
|
headers=headers
|
|
)
|
|
|
|
if upload_response.status_code == 200:
|
|
print(" ✅ File uploaded successfully")
|
|
upload_result = upload_response.json()
|
|
print(f" Response: {upload_result}")
|
|
else:
|
|
print(f" ❌ Upload failed: {upload_response.status_code}")
|
|
print(f" Response: {upload_response.text}")
|
|
return False
|
|
except Exception as e:
|
|
print(f" ❌ Upload error: {e}")
|
|
return False
|
|
|
|
# 3. Wait for indexing to complete
|
|
print("\n3. Waiting for indexing to complete...")
|
|
max_wait_time = 120 # 2 minutes
|
|
wait_interval = 5
|
|
elapsed_time = 0
|
|
|
|
while elapsed_time < max_wait_time:
|
|
try:
|
|
# Check document status
|
|
status_response = requests.get(f"{base_url}/api/documents", headers={"X-API-Key": api_key})
|
|
if status_response.status_code == 200:
|
|
documents = status_response.json()
|
|
if documents:
|
|
doc_status = documents[0].get("status", "unknown")
|
|
print(f" Document status: {doc_status} (elapsed: {elapsed_time}s)")
|
|
|
|
if doc_status == "PROCESSED":
|
|
print(" ✅ Indexing completed successfully")
|
|
break
|
|
elif doc_status == "FAILED":
|
|
print(" ❌ Indexing failed")
|
|
return False
|
|
else:
|
|
print(" No documents found, waiting...")
|
|
else:
|
|
print(f" Status check failed: {status_response.status_code}")
|
|
except Exception as e:
|
|
print(f" Status check error: {e}")
|
|
|
|
time.sleep(wait_interval)
|
|
elapsed_time += wait_interval
|
|
|
|
if elapsed_time >= max_wait_time:
|
|
print(" ⚠️ Indexing timeout - proceeding with search test")
|
|
|
|
# 4. Test search functionality
|
|
print("\n4. Testing search functionality...")
|
|
test_queries = [
|
|
"artificial intelligence",
|
|
"machine learning",
|
|
"neural networks",
|
|
"data science"
|
|
]
|
|
|
|
search_success = False
|
|
for query in test_queries:
|
|
try:
|
|
search_payload = {
|
|
"query": query,
|
|
"top_k": 5
|
|
}
|
|
search_response = requests.post(
|
|
f"{base_url}/api/search",
|
|
json=search_payload,
|
|
headers={"X-API-Key": api_key, "Content-Type": "application/json"}
|
|
)
|
|
|
|
if search_response.status_code == 200:
|
|
search_results = search_response.json()
|
|
if search_results and len(search_results) > 0:
|
|
print(f" ✅ Search successful for '{query}': Found {len(search_results)} results")
|
|
search_success = True
|
|
# Print first result snippet
|
|
first_result = search_results[0]
|
|
content_preview = first_result.get('content', '')[:100] + "..."
|
|
print(f" First result preview: {content_preview}")
|
|
break
|
|
else:
|
|
print(f" ⚠️ No results for '{query}'")
|
|
else:
|
|
print(f" ❌ Search failed for '{query}': {search_response.status_code}")
|
|
except Exception as e:
|
|
print(f" ❌ Search error for '{query}': {e}")
|
|
|
|
# 5. Test web UI accessibility
|
|
print("\n5. Testing web UI accessibility...")
|
|
try:
|
|
webui_response = requests.get(f"{base_url}/webui/")
|
|
if webui_response.status_code == 200:
|
|
print(" ✅ Web UI is accessible")
|
|
else:
|
|
print(f" ⚠️ Web UI returned status: {webui_response.status_code}")
|
|
except Exception as e:
|
|
print(f" ❌ Web UI access error: {e}")
|
|
|
|
# Final summary
|
|
print("\n=== Test Summary ===")
|
|
if search_success:
|
|
print("✅ SUCCESS: OCR PDF workflow is working correctly!")
|
|
print(" - File upload: ✓")
|
|
print(" - Indexing: ✓")
|
|
print(" - Search: ✓")
|
|
print(" - Web UI: ✓")
|
|
return True
|
|
else:
|
|
print("❌ FAILURE: Some tests failed")
|
|
return False
|
|
|
|
if __name__ == "__main__":
|
|
success = test_ocr_pdf_workflow()
|
|
sys.exit(0 if success else 1) |