106 lines
4.2 KiB
Python
106 lines
4.2 KiB
Python
import requests
|
|
import os
|
|
import json
|
|
import time
|
|
|
|
# Test uploading the ocr.pdf file with proper authentication
|
|
base_url = 'http://localhost:3015'
|
|
pdf_file = 'ocr.pdf'
|
|
username = 'jleu3482'
|
|
password = 'jleu1212'
|
|
|
|
# Check if ocr.pdf exists
|
|
if not os.path.exists(pdf_file):
|
|
print(f"❌ {pdf_file} not found in current directory")
|
|
exit(1)
|
|
|
|
print(f"📄 Found {pdf_file}, size: {os.path.getsize(pdf_file)} bytes")
|
|
|
|
# Login first (OAuth2 password flow)
|
|
print("🔐 Logging in...")
|
|
login_data = {
|
|
"username": username,
|
|
"password": password,
|
|
"grant_type": "password",
|
|
"scope": "",
|
|
"client_id": None,
|
|
"client_secret": None
|
|
}
|
|
|
|
login_response = requests.post(f'{base_url}/login', data=login_data)
|
|
|
|
if login_response.status_code == 200:
|
|
token_data = login_response.json()
|
|
access_token = token_data.get('access_token')
|
|
headers = {'Authorization': f'Bearer {access_token}'}
|
|
print('✅ Login successful')
|
|
|
|
# First, let's clear any existing documents to avoid "duplicated" error
|
|
print("🗑️ Clearing existing documents...")
|
|
clear_response = requests.delete(f'{base_url}/documents', headers=headers)
|
|
if clear_response.status_code == 200:
|
|
print("✅ Documents cleared")
|
|
else:
|
|
print(f"⚠️ Could not clear documents: {clear_response.text}")
|
|
|
|
# Wait a moment for cleanup
|
|
time.sleep(2)
|
|
|
|
# Upload ocr.pdf
|
|
print(f"📤 Uploading {pdf_file}...")
|
|
with open(pdf_file, 'rb') as file:
|
|
files = {'file': (pdf_file, file, 'application/pdf')}
|
|
upload_response = requests.post(f'{base_url}/documents/upload', files=files, headers=headers)
|
|
|
|
print(f" Upload Status: {upload_response.status_code}")
|
|
if upload_response.status_code == 200:
|
|
result = upload_response.json()
|
|
print(f" Response: {json.dumps(result, indent=2)}")
|
|
|
|
if result.get('status') == 'success':
|
|
print('✅ Upload successful')
|
|
track_id = result.get('track_id')
|
|
|
|
# Wait for processing to complete
|
|
print("\n⏳ Waiting for document processing...")
|
|
time.sleep(10)
|
|
|
|
# Check document status
|
|
print("\n🔍 Checking document status...")
|
|
status_response = requests.get(f'{base_url}/documents', headers=headers)
|
|
if status_response.status_code == 200:
|
|
documents = status_response.json()
|
|
print(f" Documents: {json.dumps(documents, indent=2)}")
|
|
|
|
# Look for our document in the response
|
|
if 'statuses' in documents:
|
|
for status_type, doc_list in documents['statuses'].items():
|
|
for doc in doc_list:
|
|
if doc.get('file_path') == pdf_file:
|
|
print(f"\n📊 Document Status:")
|
|
print(f" File: {doc.get('file_path')}")
|
|
print(f" Status: {doc.get('status')}")
|
|
print(f" Error: {doc.get('error_msg', 'None')}")
|
|
print(f" Content Length: {doc.get('content_length', 'Unknown')}")
|
|
break
|
|
else:
|
|
print(f"❌ Failed to get documents: {status_response.text}")
|
|
|
|
# Test search with content from ocr.pdf
|
|
print("\n🔍 Testing search functionality...")
|
|
query_data = {'query': 'safety precautions minimum safe distance high voltage', 'top_k': 5}
|
|
search_response = requests.post(f'{base_url}/query', json=query_data, headers=headers)
|
|
|
|
if search_response.status_code == 200:
|
|
results = search_response.json()
|
|
print(f"✅ Search successful")
|
|
print(f" Response: {results.get('response', 'No response field')}")
|
|
else:
|
|
print(f'❌ Search failed: {search_response.text}')
|
|
else:
|
|
print(f'❌ Upload failed: {result.get("message", "Unknown error")}')
|
|
else:
|
|
print(f'❌ Upload failed with status {upload_response.status_code}: {upload_response.text}')
|
|
|
|
else:
|
|
print(f'❌ Login failed: {login_response.text}') |