102 lines
3.7 KiB
Python
102 lines
3.7 KiB
Python
import requests
|
|
import json
|
|
import time
|
|
|
|
def login_and_get_token(base_url):
|
|
"""Login and get authentication token"""
|
|
login_data = {
|
|
"username": "jleu3482",
|
|
"password": "jleu1212"
|
|
}
|
|
|
|
print("Logging in...")
|
|
login_response = requests.post(
|
|
f"{base_url}/login",
|
|
data=login_data,
|
|
headers={"Content-Type": "application/x-www-form-urlencoded"}
|
|
)
|
|
|
|
if login_response.status_code == 200:
|
|
token = login_response.json().get("access_token")
|
|
print("✅ Login successful")
|
|
return {"Authorization": f"Bearer {token}"}
|
|
else:
|
|
print(f"❌ Login failed: {login_response.status_code}")
|
|
print(f"Response: {login_response.text}")
|
|
return None
|
|
|
|
def test_ocr_upload_and_search():
|
|
"""Test the complete OCR PDF upload and search workflow"""
|
|
|
|
base_url = "http://localhost:3015"
|
|
|
|
# Get authentication token
|
|
headers = login_and_get_token(base_url)
|
|
if not headers:
|
|
return
|
|
|
|
# Test server health
|
|
print("\nTesting server health...")
|
|
health_response = requests.get(f"{base_url}/health", headers=headers)
|
|
if health_response.status_code == 200:
|
|
print("✅ Server is healthy")
|
|
health_data = health_response.json()
|
|
print(f"LLM Binding: {health_data['configuration']['llm_binding']}")
|
|
print(f"LLM Host: {health_data['configuration']['llm_binding_host']}")
|
|
print(f"Embedding Model: {health_data['configuration']['embedding_model']}")
|
|
|
|
# Check if DeepSeek configuration is correct
|
|
if "deepseek.com" in health_data['configuration']['llm_binding_host']:
|
|
print("✅ DeepSeek API configuration is correct")
|
|
else:
|
|
print("❌ DeepSeek API configuration is incorrect")
|
|
|
|
else:
|
|
print(f"❌ Server health check failed: {health_response.status_code}")
|
|
return
|
|
|
|
# Test search with OCR content
|
|
print("\n=== Testing Search with OCR Content ===")
|
|
search_data = {
|
|
"query": "optical character recognition",
|
|
"top_k": 3
|
|
}
|
|
|
|
try:
|
|
search_response = requests.post(f"{base_url}/search", json=search_data, headers=headers)
|
|
print(f"Search response status: {search_response.status_code}")
|
|
|
|
if search_response.status_code == 200:
|
|
search_results = search_response.json()
|
|
print("✅ Search successful!")
|
|
print(f"Found {len(search_results.get('results', []))} results")
|
|
|
|
# Display results
|
|
for i, result in enumerate(search_results.get('results', [])):
|
|
print(f"\nResult {i+1}:")
|
|
print(f" Score: {result.get('score', 0):.4f}")
|
|
print(f" Content: {result.get('content', '')[:200]}...")
|
|
print(f" Source: {result.get('source', '')}")
|
|
|
|
else:
|
|
print(f"Search error: {search_response.text}")
|
|
|
|
except Exception as e:
|
|
print(f"Search request failed: {e}")
|
|
|
|
# Test document list to verify OCR PDF was processed
|
|
print("\n=== Testing Document List ===")
|
|
try:
|
|
docs_response = requests.get(f"{base_url}/documents", headers=headers)
|
|
if docs_response.status_code == 200:
|
|
docs_data = docs_response.json()
|
|
print(f"Found {len(docs_data.get('documents', []))} documents")
|
|
for doc in docs_data.get('documents', []):
|
|
print(f" - {doc.get('name', '')} (Status: {doc.get('status', '')})")
|
|
else:
|
|
print(f"Documents list failed: {docs_response.status_code}")
|
|
except Exception as e:
|
|
print(f"Documents request failed: {e}")
|
|
|
|
if __name__ == "__main__":
|
|
test_ocr_upload_and_search() |