Files
railseek6/test_ocr_upload_corrected.py

117 lines
4.7 KiB
Python

import requests
import json
import time
import os
def test_ocr_upload_with_corrected_dimensions():
"""Test OCR PDF upload with corrected Qdrant dimensions (1024) and proper login"""
base_url = "http://localhost:3015"
# Test login with form data (not JSON)
print("Testing login with form data...")
login_data = {
"username": "jleu3482",
"password": "jleu1212"
}
try:
# Use form data for login (OAuth2PasswordRequestForm expects form data, not JSON)
login_response = requests.post(
f"{base_url}/login",
data=login_data,
headers={"Content-Type": "application/x-www-form-urlencoded"}
)
print(f"Login status: {login_response.status_code}")
if login_response.status_code == 200:
print("✅ Login successful")
login_result = login_response.json()
token = login_result.get("access_token")
print(f"Token received: {token[:50]}...")
headers = {"Authorization": f"Bearer {token}"}
# Test document upload
print("\nTesting OCR PDF upload...")
files = {'file': ('ocr.pdf', open('ocr.pdf', 'rb'), 'application/pdf')}
try:
upload_response = requests.post(
f"{base_url}/documents/upload",
files=files,
headers=headers
)
if upload_response.status_code == 200:
print("✅ Upload successful")
upload_result = upload_response.json()
print(f"Upload response: {json.dumps(upload_result, indent=2)}")
# Wait for processing
print("\n⏳ Waiting for document processing...")
time.sleep(15)
# Check document status
status_response = requests.get(f"{base_url}/documents", headers=headers)
if status_response.status_code == 200:
documents = status_response.json()
print(f"📄 Document status: {json.dumps(documents, indent=2)}")
# Test search functionality
print("\n🔍 Testing search functionality...")
search_data = {
"query": "minimum safe distance",
"top_k": 5
}
search_response = requests.post(
f"{base_url}/search",
json=search_data,
headers=headers
)
if search_response.status_code == 200:
search_results = search_response.json()
print("✅ Search successful!")
print(f"Search results: {json.dumps(search_results, indent=2)}")
# Verify OCR content was properly indexed
if "chunks" in search_results and len(search_results["chunks"]) > 0:
print("✅ OCR content successfully indexed and searchable!")
return True
else:
print("❌ No search results found")
return False
else:
print(f"❌ Search failed: {search_response.status_code}")
print(f"Search error: {search_response.text}")
return False
else:
print(f"❌ Upload failed: {upload_response.status_code}")
print(f"Upload error: {upload_response.text}")
except Exception as e:
print(f"❌ Upload error: {e}")
return False
finally:
if 'files' in locals():
files['file'][1].close()
else:
print(f"❌ Login failed with status {login_response.status_code}")
print(f"Login error: {login_response.text}")
return False
except Exception as e:
print(f"❌ Login error: {e}")
return False
return False
if __name__ == "__main__":
success = test_ocr_upload_with_corrected_dimensions()
if success:
print("\n🎉 OCR Upload and Search Test COMPLETED SUCCESSFULLY!")
else:
print("\n💥 OCR Upload and Search Test FAILED!")