Files
railseek6/test_ocr_upload_and_search.py

102 lines
3.7 KiB
Python

import requests
import json
import time
def login_and_get_token(base_url):
"""Login and get authentication token"""
login_data = {
"username": "jleu3482",
"password": "jleu1212"
}
print("Logging in...")
login_response = requests.post(
f"{base_url}/login",
data=login_data,
headers={"Content-Type": "application/x-www-form-urlencoded"}
)
if login_response.status_code == 200:
token = login_response.json().get("access_token")
print("✅ Login successful")
return {"Authorization": f"Bearer {token}"}
else:
print(f"❌ Login failed: {login_response.status_code}")
print(f"Response: {login_response.text}")
return None
def test_ocr_upload_and_search():
"""Test the complete OCR PDF upload and search workflow"""
base_url = "http://localhost:3015"
# Get authentication token
headers = login_and_get_token(base_url)
if not headers:
return
# Test server health
print("\nTesting server health...")
health_response = requests.get(f"{base_url}/health", headers=headers)
if health_response.status_code == 200:
print("✅ Server is healthy")
health_data = health_response.json()
print(f"LLM Binding: {health_data['configuration']['llm_binding']}")
print(f"LLM Host: {health_data['configuration']['llm_binding_host']}")
print(f"Embedding Model: {health_data['configuration']['embedding_model']}")
# Check if DeepSeek configuration is correct
if "deepseek.com" in health_data['configuration']['llm_binding_host']:
print("✅ DeepSeek API configuration is correct")
else:
print("❌ DeepSeek API configuration is incorrect")
else:
print(f"❌ Server health check failed: {health_response.status_code}")
return
# Test search with OCR content
print("\n=== Testing Search with OCR Content ===")
search_data = {
"query": "optical character recognition",
"top_k": 3
}
try:
search_response = requests.post(f"{base_url}/search", json=search_data, headers=headers)
print(f"Search response status: {search_response.status_code}")
if search_response.status_code == 200:
search_results = search_response.json()
print("✅ Search successful!")
print(f"Found {len(search_results.get('results', []))} results")
# Display results
for i, result in enumerate(search_results.get('results', [])):
print(f"\nResult {i+1}:")
print(f" Score: {result.get('score', 0):.4f}")
print(f" Content: {result.get('content', '')[:200]}...")
print(f" Source: {result.get('source', '')}")
else:
print(f"Search error: {search_response.text}")
except Exception as e:
print(f"Search request failed: {e}")
# Test document list to verify OCR PDF was processed
print("\n=== Testing Document List ===")
try:
docs_response = requests.get(f"{base_url}/documents", headers=headers)
if docs_response.status_code == 200:
docs_data = docs_response.json()
print(f"Found {len(docs_data.get('documents', []))} documents")
for doc in docs_data.get('documents', []):
print(f" - {doc.get('name', '')} (Status: {doc.get('status', '')})")
else:
print(f"Documents list failed: {docs_response.status_code}")
except Exception as e:
print(f"Documents request failed: {e}")
if __name__ == "__main__":
test_ocr_upload_and_search()