163 lines
6.1 KiB
Python
163 lines
6.1 KiB
Python
import requests
|
|
import json
|
|
import time
|
|
import os
|
|
|
|
def test_ocr_upload_workflow():
|
|
"""Test OCR PDF upload, indexing, and search without authentication"""
|
|
|
|
base_url = "http://localhost:3015"
|
|
|
|
print("Testing OCR PDF upload workflow without authentication...")
|
|
|
|
# Test 1: Check server status
|
|
print("\n1. Testing server status...")
|
|
try:
|
|
response = requests.get(f"{base_url}/health")
|
|
if response.status_code == 200:
|
|
status_data = response.json()
|
|
print(f"✓ Server is running - Status: {status_data.get('status')}")
|
|
print(f" Auth mode: {status_data.get('auth_mode')}")
|
|
else:
|
|
print(f"✗ Server returned status: {response.status_code}")
|
|
return False
|
|
except Exception as e:
|
|
print(f"✗ Cannot connect to server: {e}")
|
|
return False
|
|
|
|
# Test 2: Check authentication status
|
|
print("\n2. Testing authentication status...")
|
|
try:
|
|
response = requests.get(f"{base_url}/auth-status")
|
|
if response.status_code == 200:
|
|
auth_data = response.json()
|
|
print(f"✓ Auth status: {auth_data.get('auth_configured')}")
|
|
print(f" Auth mode: {auth_data.get('auth_mode')}")
|
|
if auth_data.get('auth_configured'):
|
|
print("✗ Authentication is still enabled!")
|
|
return False
|
|
else:
|
|
print("✓ Authentication is disabled - guest access enabled")
|
|
else:
|
|
print(f"✗ Auth status check failed: {response.status_code}")
|
|
return False
|
|
except Exception as e:
|
|
print(f"✗ Auth status check failed: {e}")
|
|
return False
|
|
|
|
# Test 3: Upload OCR PDF file
|
|
print("\n3. Uploading OCR PDF file...")
|
|
try:
|
|
with open("ocr.pdf", "rb") as file:
|
|
files = {"file": ("ocr.pdf", file, "application/pdf")}
|
|
response = requests.post(f"{base_url}/documents", files=files)
|
|
|
|
if response.status_code == 200:
|
|
upload_data = response.json()
|
|
print(f"✓ File uploaded successfully")
|
|
print(f" Document ID: {upload_data.get('document_id')}")
|
|
print(f" Status: {upload_data.get('status')}")
|
|
else:
|
|
print(f"✗ Upload failed: {response.status_code} - {response.text}")
|
|
return False
|
|
except Exception as e:
|
|
print(f"✗ Upload failed: {e}")
|
|
return False
|
|
|
|
# Test 4: Monitor indexing progress
|
|
print("\n4. Monitoring indexing progress...")
|
|
max_wait_time = 120 # 2 minutes max
|
|
wait_interval = 5
|
|
elapsed_time = 0
|
|
|
|
while elapsed_time < max_wait_time:
|
|
try:
|
|
response = requests.get(f"{base_url}/documents")
|
|
if response.status_code == 200:
|
|
docs_data = response.json()
|
|
if docs_data:
|
|
latest_doc = docs_data[0]
|
|
status = latest_doc.get('status')
|
|
print(f" Current status: {status} (waited {elapsed_time}s)")
|
|
|
|
if status == "completed":
|
|
print("✓ Indexing completed successfully!")
|
|
break
|
|
elif status == "failed":
|
|
print("✗ Indexing failed!")
|
|
return False
|
|
else:
|
|
print(" No documents found")
|
|
else:
|
|
print(f" Failed to get document status: {response.status_code}")
|
|
|
|
time.sleep(wait_interval)
|
|
elapsed_time += wait_interval
|
|
|
|
except Exception as e:
|
|
print(f" Error checking status: {e}")
|
|
time.sleep(wait_interval)
|
|
elapsed_time += wait_interval
|
|
|
|
if elapsed_time >= max_wait_time:
|
|
print("✗ Indexing timeout reached")
|
|
return False
|
|
|
|
# Test 5: Test search functionality
|
|
print("\n5. Testing search functionality...")
|
|
try:
|
|
search_query = "document text content"
|
|
search_data = {
|
|
"query": search_query,
|
|
"top_k": 5
|
|
}
|
|
|
|
response = requests.post(f"{base_url}/search", json=search_data)
|
|
if response.status_code == 200:
|
|
search_results = response.json()
|
|
print(f"✓ Search successful")
|
|
print(f" Found {len(search_results.get('results', []))} results")
|
|
|
|
# Display first result if available
|
|
if search_results.get('results'):
|
|
first_result = search_results['results'][0]
|
|
print(f" First result score: {first_result.get('score')}")
|
|
print(f" First result content preview: {first_result.get('content', '')[:100]}...")
|
|
else:
|
|
print(" No search results returned")
|
|
else:
|
|
print(f"✗ Search failed: {response.status_code} - {response.text}")
|
|
return False
|
|
except Exception as e:
|
|
print(f"✗ Search test failed: {e}")
|
|
return False
|
|
|
|
# Test 6: Test query endpoint (RAG functionality)
|
|
print("\n6. Testing RAG query functionality...")
|
|
try:
|
|
query_data = {
|
|
"query": "What is this document about?",
|
|
"top_k": 3
|
|
}
|
|
|
|
response = requests.post(f"{base_url}/query", json=query_data)
|
|
if response.status_code == 200:
|
|
query_result = response.json()
|
|
print(f"✓ Query successful")
|
|
print(f" Response: {query_result.get('response', '')[:200]}...")
|
|
print(f" Sources: {len(query_result.get('sources', []))}")
|
|
else:
|
|
print(f"✗ Query failed: {response.status_code} - {response.text}")
|
|
return False
|
|
except Exception as e:
|
|
print(f"✗ Query test failed: {e}")
|
|
return False
|
|
|
|
print("\n🎉 All tests passed! OCR PDF upload, indexing, and search workflow is working correctly without authentication.")
|
|
return True
|
|
|
|
if __name__ == "__main__":
|
|
success = test_ocr_upload_workflow()
|
|
if not success:
|
|
print("\n❌ Some tests failed. Check the server status and configuration.")
|
|
exit(1) |