Files
railseek6/test_ocr_upload_no_auth.py

163 lines
6.1 KiB
Python

import requests
import json
import time
import os
def test_ocr_upload_workflow():
"""Test OCR PDF upload, indexing, and search without authentication"""
base_url = "http://localhost:3015"
print("Testing OCR PDF upload workflow without authentication...")
# Test 1: Check server status
print("\n1. Testing server status...")
try:
response = requests.get(f"{base_url}/health")
if response.status_code == 200:
status_data = response.json()
print(f"✓ Server is running - Status: {status_data.get('status')}")
print(f" Auth mode: {status_data.get('auth_mode')}")
else:
print(f"✗ Server returned status: {response.status_code}")
return False
except Exception as e:
print(f"✗ Cannot connect to server: {e}")
return False
# Test 2: Check authentication status
print("\n2. Testing authentication status...")
try:
response = requests.get(f"{base_url}/auth-status")
if response.status_code == 200:
auth_data = response.json()
print(f"✓ Auth status: {auth_data.get('auth_configured')}")
print(f" Auth mode: {auth_data.get('auth_mode')}")
if auth_data.get('auth_configured'):
print("✗ Authentication is still enabled!")
return False
else:
print("✓ Authentication is disabled - guest access enabled")
else:
print(f"✗ Auth status check failed: {response.status_code}")
return False
except Exception as e:
print(f"✗ Auth status check failed: {e}")
return False
# Test 3: Upload OCR PDF file
print("\n3. Uploading OCR PDF file...")
try:
with open("ocr.pdf", "rb") as file:
files = {"file": ("ocr.pdf", file, "application/pdf")}
response = requests.post(f"{base_url}/documents", files=files)
if response.status_code == 200:
upload_data = response.json()
print(f"✓ File uploaded successfully")
print(f" Document ID: {upload_data.get('document_id')}")
print(f" Status: {upload_data.get('status')}")
else:
print(f"✗ Upload failed: {response.status_code} - {response.text}")
return False
except Exception as e:
print(f"✗ Upload failed: {e}")
return False
# Test 4: Monitor indexing progress
print("\n4. Monitoring indexing progress...")
max_wait_time = 120 # 2 minutes max
wait_interval = 5
elapsed_time = 0
while elapsed_time < max_wait_time:
try:
response = requests.get(f"{base_url}/documents")
if response.status_code == 200:
docs_data = response.json()
if docs_data:
latest_doc = docs_data[0]
status = latest_doc.get('status')
print(f" Current status: {status} (waited {elapsed_time}s)")
if status == "completed":
print("✓ Indexing completed successfully!")
break
elif status == "failed":
print("✗ Indexing failed!")
return False
else:
print(" No documents found")
else:
print(f" Failed to get document status: {response.status_code}")
time.sleep(wait_interval)
elapsed_time += wait_interval
except Exception as e:
print(f" Error checking status: {e}")
time.sleep(wait_interval)
elapsed_time += wait_interval
if elapsed_time >= max_wait_time:
print("✗ Indexing timeout reached")
return False
# Test 5: Test search functionality
print("\n5. Testing search functionality...")
try:
search_query = "document text content"
search_data = {
"query": search_query,
"top_k": 5
}
response = requests.post(f"{base_url}/search", json=search_data)
if response.status_code == 200:
search_results = response.json()
print(f"✓ Search successful")
print(f" Found {len(search_results.get('results', []))} results")
# Display first result if available
if search_results.get('results'):
first_result = search_results['results'][0]
print(f" First result score: {first_result.get('score')}")
print(f" First result content preview: {first_result.get('content', '')[:100]}...")
else:
print(" No search results returned")
else:
print(f"✗ Search failed: {response.status_code} - {response.text}")
return False
except Exception as e:
print(f"✗ Search test failed: {e}")
return False
# Test 6: Test query endpoint (RAG functionality)
print("\n6. Testing RAG query functionality...")
try:
query_data = {
"query": "What is this document about?",
"top_k": 3
}
response = requests.post(f"{base_url}/query", json=query_data)
if response.status_code == 200:
query_result = response.json()
print(f"✓ Query successful")
print(f" Response: {query_result.get('response', '')[:200]}...")
print(f" Sources: {len(query_result.get('sources', []))}")
else:
print(f"✗ Query failed: {response.status_code} - {response.text}")
return False
except Exception as e:
print(f"✗ Query test failed: {e}")
return False
print("\n🎉 All tests passed! OCR PDF upload, indexing, and search workflow is working correctly without authentication.")
return True
if __name__ == "__main__":
success = test_ocr_upload_workflow()
if not success:
print("\n❌ Some tests failed. Check the server status and configuration.")
exit(1)