117 lines
4.7 KiB
Python
117 lines
4.7 KiB
Python
import requests
|
|
import json
|
|
import time
|
|
import os
|
|
|
|
def test_ocr_upload_with_corrected_dimensions():
|
|
"""Test OCR PDF upload with corrected Qdrant dimensions (1024) and proper login"""
|
|
|
|
base_url = "http://localhost:3015"
|
|
|
|
# Test login with form data (not JSON)
|
|
print("Testing login with form data...")
|
|
login_data = {
|
|
"username": "jleu3482",
|
|
"password": "jleu1212"
|
|
}
|
|
|
|
try:
|
|
# Use form data for login (OAuth2PasswordRequestForm expects form data, not JSON)
|
|
login_response = requests.post(
|
|
f"{base_url}/login",
|
|
data=login_data,
|
|
headers={"Content-Type": "application/x-www-form-urlencoded"}
|
|
)
|
|
|
|
print(f"Login status: {login_response.status_code}")
|
|
|
|
if login_response.status_code == 200:
|
|
print("✅ Login successful")
|
|
login_result = login_response.json()
|
|
token = login_result.get("access_token")
|
|
print(f"Token received: {token[:50]}...")
|
|
headers = {"Authorization": f"Bearer {token}"}
|
|
|
|
# Test document upload
|
|
print("\nTesting OCR PDF upload...")
|
|
files = {'file': ('ocr.pdf', open('ocr.pdf', 'rb'), 'application/pdf')}
|
|
|
|
try:
|
|
upload_response = requests.post(
|
|
f"{base_url}/documents/upload",
|
|
files=files,
|
|
headers=headers
|
|
)
|
|
|
|
if upload_response.status_code == 200:
|
|
print("✅ Upload successful")
|
|
upload_result = upload_response.json()
|
|
print(f"Upload response: {json.dumps(upload_result, indent=2)}")
|
|
|
|
# Wait for processing
|
|
print("\n⏳ Waiting for document processing...")
|
|
time.sleep(15)
|
|
|
|
# Check document status
|
|
status_response = requests.get(f"{base_url}/documents", headers=headers)
|
|
if status_response.status_code == 200:
|
|
documents = status_response.json()
|
|
print(f"📄 Document status: {json.dumps(documents, indent=2)}")
|
|
|
|
# Test search functionality
|
|
print("\n🔍 Testing search functionality...")
|
|
search_data = {
|
|
"query": "minimum safe distance",
|
|
"top_k": 5
|
|
}
|
|
|
|
search_response = requests.post(
|
|
f"{base_url}/search",
|
|
json=search_data,
|
|
headers=headers
|
|
)
|
|
|
|
if search_response.status_code == 200:
|
|
search_results = search_response.json()
|
|
print("✅ Search successful!")
|
|
print(f"Search results: {json.dumps(search_results, indent=2)}")
|
|
|
|
# Verify OCR content was properly indexed
|
|
if "chunks" in search_results and len(search_results["chunks"]) > 0:
|
|
print("✅ OCR content successfully indexed and searchable!")
|
|
return True
|
|
else:
|
|
print("❌ No search results found")
|
|
return False
|
|
else:
|
|
print(f"❌ Search failed: {search_response.status_code}")
|
|
print(f"Search error: {search_response.text}")
|
|
return False
|
|
|
|
else:
|
|
print(f"❌ Upload failed: {upload_response.status_code}")
|
|
print(f"Upload error: {upload_response.text}")
|
|
|
|
except Exception as e:
|
|
print(f"❌ Upload error: {e}")
|
|
return False
|
|
finally:
|
|
if 'files' in locals():
|
|
files['file'][1].close()
|
|
else:
|
|
print(f"❌ Login failed with status {login_response.status_code}")
|
|
print(f"Login error: {login_response.text}")
|
|
return False
|
|
|
|
except Exception as e:
|
|
print(f"❌ Login error: {e}")
|
|
return False
|
|
|
|
return False
|
|
|
|
if __name__ == "__main__":
|
|
success = test_ocr_upload_with_corrected_dimensions()
|
|
if success:
|
|
print("\n🎉 OCR Upload and Search Test COMPLETED SUCCESSFULLY!")
|
|
else:
|
|
print("\n💥 OCR Upload and Search Test FAILED!") |