Files
railseek6/test_ocr_fix_with_auth.py

103 lines
4.6 KiB
Python

import requests
import time
import os
def test_ocr_upload_with_auth():
"""Test uploading the ocr.pdf file with authentication"""
# Server URL (assuming it's running on port 3015)
base_url = "http://localhost:3015"
# Authentication credentials
auth_data = {
"username": "jleu3482",
"password": "jleu1212"
}
# First, login to get token
try:
print("Logging in...")
login_response = requests.post(f"{base_url}/auth/login", data=auth_data)
if login_response.status_code == 200:
token_data = login_response.json()
access_token = token_data['access_token']
print(f"Login successful! Token: {access_token[:20]}...")
# Set up headers with authentication
headers = {
"Authorization": f"Bearer {access_token}"
}
# Upload the ocr.pdf file
files = {'file': ('ocr.pdf', open('ocr.pdf', 'rb'), 'application/pdf')}
try:
print("Uploading ocr.pdf...")
response = requests.post(f"{base_url}/documents/upload", files=files, headers=headers)
if response.status_code == 200:
result = response.json()
print(f"Upload successful!")
print(f"Status: {result['status']}")
print(f"Message: {result['message']}")
print(f"Track ID: {result['track_id']}")
# Wait a bit for processing
print("Waiting for processing...")
time.sleep(15)
# Check document status
print("Checking document status...")
status_response = requests.get(f"{base_url}/documents", headers=headers)
if status_response.status_code == 200:
status_data = status_response.json()
print("Document statuses:")
for status_type, docs in status_data['statuses'].items():
print(f" {status_type}: {len(docs)} documents")
for doc in docs:
print(f" - {doc['file_path']}: {doc['content_summary']} (chunks: {doc.get('chunks_count', 0)}, entities: {doc.get('chunks_count', 0)})")
# Check track status
print(f"Checking track status for {result['track_id']}...")
track_response = requests.get(f"{base_url}/documents/track_status/{result['track_id']}", headers=headers)
if track_response.status_code == 200:
track_data = track_response.json()
print(f"Total documents in track: {track_data['total_count']}")
for doc in track_data['documents']:
print(f" - {doc['file_path']}: {doc['status']} (chunks: {doc.get('chunks_count', 0)})")
# Test search functionality
print("Testing search functionality...")
search_data = {
"query": "safety precautions minimum distance",
"top_k": 5
}
search_response = requests.post(f"{base_url}/query", json=search_data, headers=headers)
if search_response.status_code == 200:
search_result = search_response.json()
print(f"Search successful! Found {len(search_result.get('results', []))} results")
for i, result in enumerate(search_result.get('results', [])):
print(f" Result {i+1}: {result.get('content', '')[:100]}...")
else:
print(f"Search failed: {search_response.status_code}")
else:
print(f"Upload failed: {response.status_code}")
print(f"Response: {response.text}")
except Exception as e:
print(f"Error during upload: {e}")
finally:
# Close the file if it was opened
if 'files' in locals():
files['file'][1].close()
else:
print(f"Login failed: {login_response.status_code}")
print(f"Response: {login_response.text}")
except Exception as e:
print(f"Error during login: {e}")
if __name__ == "__main__":
test_ocr_upload_with_auth()