Files
railseek6/get_search_results.py

77 lines
2.4 KiB
Python

import requests
import json
import base64
def test_search_with_auth():
"""Test search functionality with authentication"""
base_url = "http://localhost:3015"
username = "jleu3482"
password = "jleu1212"
# Create basic auth header
credentials = f"{username}:{password}"
encoded_credentials = base64.b64encode(credentials.encode()).decode()
headers = {
"Authorization": f"Basic {encoded_credentials}",
"Content-Type": "application/json"
}
# Test queries
queries = [
"OCR",
"text extraction",
"document processing",
"optical character recognition",
"PDF conversion"
]
print("🔍 SEARCH RESULTS FOR OCR PDF CONTENT")
print("=" * 70)
for query in queries:
print(f"\n📝 Query: '{query}'")
print("-" * 40)
try:
payload = {
"query": query,
"top_k": 5
}
response = requests.post(
f"{base_url}/search",
json=payload,
headers=headers
)
if response.status_code == 200:
results = response.json()
print(f"✅ Search successful - {len(results.get('results', []))} results found")
# Display results
for i, result in enumerate(results.get('results', [])):
print(f"\n Result {i+1}:")
print(f" Content: {result.get('content', '')[:200]}...")
print(f" Score: {result.get('score', 0):.4f}")
print(f" Source: {result.get('source', 'Unknown')}")
else:
print(f"❌ Search failed: {response.status_code} - {response.text}")
except Exception as e:
print(f"❌ Search error: {e}")
print("\n" + "=" * 70)
print("📊 SEARCH SUMMARY")
print("=" * 70)
print("If results are found, the OCR PDF has been successfully:")
print("✅ Uploaded to the system")
print("✅ Processed with OCR text extraction")
print("✅ Indexed across all databases")
print("✅ Made searchable via vector similarity")
print("\nIf no results are found, the document may not be indexed yet.")
print("Please upload the OCR PDF through the Web UI first.")
if __name__ == "__main__":
test_search_with_auth()