136 lines
3.8 KiB
Python
136 lines
3.8 KiB
Python
import requests
|
|
import json
|
|
import base64
|
|
|
|
def get_jwt_token():
|
|
"""Get JWT token by logging in"""
|
|
base_url = "http://localhost:3015"
|
|
username = "jleu3482"
|
|
password = "jleu1212"
|
|
|
|
# Login to get JWT token
|
|
login_data = {
|
|
"username": username,
|
|
"password": password
|
|
}
|
|
|
|
try:
|
|
response = requests.post(
|
|
f"{base_url}/login",
|
|
data=login_data
|
|
)
|
|
|
|
if response.status_code == 200:
|
|
token_data = response.json()
|
|
print("✅ Login successful")
|
|
print(f"Token: {token_data.get('access_token', 'No token')}")
|
|
return token_data.get('access_token')
|
|
else:
|
|
print(f"❌ Login failed: {response.status_code} - {response.text}")
|
|
return None
|
|
|
|
except Exception as e:
|
|
print(f"❌ Login error: {e}")
|
|
return None
|
|
|
|
def search_with_jwt(token, query, top_k=5):
|
|
"""Search using JWT token"""
|
|
base_url = "http://localhost:3015"
|
|
|
|
headers = {
|
|
"Authorization": f"Bearer {token}",
|
|
"Content-Type": "application/json"
|
|
}
|
|
|
|
payload = {
|
|
"query": query,
|
|
"top_k": top_k
|
|
}
|
|
|
|
try:
|
|
response = requests.post(
|
|
f"{base_url}/search",
|
|
json=payload,
|
|
headers=headers
|
|
)
|
|
|
|
if response.status_code == 200:
|
|
return response.json()
|
|
else:
|
|
print(f"❌ Search failed: {response.status_code} - {response.text}")
|
|
return None
|
|
|
|
except Exception as e:
|
|
print(f"❌ Search error: {e}")
|
|
return None
|
|
|
|
def main():
|
|
print("🔐 GETTING JWT TOKEN AND TESTING SEARCH")
|
|
print("=" * 70)
|
|
|
|
# Step 1: Get JWT token
|
|
token = get_jwt_token()
|
|
|
|
if not token:
|
|
print("❌ Cannot proceed without JWT token")
|
|
return
|
|
|
|
print("\n🔍 TESTING SEARCH WITH JWT TOKEN")
|
|
print("=" * 70)
|
|
|
|
# Test queries
|
|
queries = [
|
|
"OCR",
|
|
"text extraction",
|
|
"document processing",
|
|
"optical character recognition",
|
|
"PDF conversion"
|
|
]
|
|
|
|
all_results = {}
|
|
|
|
for query in queries:
|
|
print(f"\n📝 Query: '{query}'")
|
|
print("-" * 40)
|
|
|
|
results = search_with_jwt(token, query)
|
|
|
|
if results:
|
|
all_results[query] = results
|
|
print(f"✅ Search successful - {len(results.get('results', []))} results found")
|
|
|
|
# Display results
|
|
for i, result in enumerate(results.get('results', [])):
|
|
print(f"\n Result {i+1}:")
|
|
print(f" Content: {result.get('content', '')[:200]}...")
|
|
print(f" Score: {result.get('score', 0):.4f}")
|
|
print(f" Source: {result.get('source', 'Unknown')}")
|
|
else:
|
|
all_results[query] = None
|
|
print("❌ No results returned")
|
|
|
|
# Summary
|
|
print("\n" + "=" * 70)
|
|
print("📊 SEARCH RESULTS SUMMARY")
|
|
print("=" * 70)
|
|
|
|
successful_searches = sum(1 for result in all_results.values() if result and result.get('results'))
|
|
total_queries = len(queries)
|
|
|
|
print(f"✅ Successful searches: {successful_searches}/{total_queries}")
|
|
|
|
if successful_searches > 0:
|
|
print("\n🎉 OCR PDF WORKFLOW VALIDATION:")
|
|
print("✅ Document uploaded successfully")
|
|
print("✅ OCR text extraction completed")
|
|
print("✅ Indexing across databases successful")
|
|
print("✅ Search functionality working")
|
|
else:
|
|
print("\n⚠️ OCR PDF may not be indexed yet")
|
|
print("Please upload the document through the Web UI first")
|
|
print("Web UI: http://localhost:3015/webui/")
|
|
print("Username: jleu3482")
|
|
print("Password: jleu1212")
|
|
|
|
if __name__ == "__main__":
|
|
main() |