Files
railseek6/test_ocr_with_auth_fixed.py

166 lines
5.7 KiB
Python

import requests
import json
import base64
import time
import logging
from pathlib import Path
# Configure logging
logging.basicConfig(
level=logging.INFO,
format='[%(asctime)s] [%(levelname)s] %(message)s',
datefmt='%Y-%m-%d %H:%M:%S'
)
class LightRAGOCRTest:
def __init__(self, base_url="http://localhost:3015", username="jleu3482", password="jleu1212"):
self.base_url = base_url
self.username = username
self.password = password
self.auth_header = self._create_auth_header()
def _create_auth_header(self):
"""Create basic auth header"""
credentials = f"{self.username}:{self.password}"
encoded_credentials = base64.b64encode(credentials.encode()).decode()
return {"Authorization": f"Basic {encoded_credentials}"}
def test_server_health(self):
"""Test basic server connectivity"""
logging.info("Testing server health endpoints...")
endpoints = {
"Root": "/",
"Health": "/health",
"WebUI": "/webui/"
}
for name, endpoint in endpoints.items():
try:
response = requests.get(f"{self.base_url}{endpoint}", headers=self.auth_header)
status = "" if response.status_code == 200 else ""
logging.info(f"{status} {name} ({endpoint}): {response.status_code}")
except Exception as e:
logging.error(f"{name} ({endpoint}): {e}")
def upload_ocr_pdf(self, file_path="ocr.pdf"):
"""Upload OCR PDF file with authentication"""
logging.info(f"Uploading OCR PDF: {file_path}")
if not Path(file_path).exists():
logging.error(f"✗ File not found: {file_path}")
return False
try:
with open(file_path, 'rb') as f:
files = {'file': (file_path, f, 'application/pdf')}
response = requests.post(
f"{self.base_url}/documents/upload",
files=files,
headers=self.auth_header
)
if response.status_code == 200:
logging.info("✓ Upload successful")
result = response.json()
logging.info(f"Upload result: {json.dumps(result, indent=2)}")
return True
else:
logging.error(f"✗ Upload failed: {response.status_code} - {response.text}")
return False
except Exception as e:
logging.error(f"✗ Upload error: {e}")
return False
def test_search(self, query="OCR text extraction"):
"""Test search functionality with authentication"""
logging.info(f"Testing search with query: '{query}'")
try:
payload = {
"query": query,
"top_k": 5
}
response = requests.post(
f"{self.base_url}/search",
json=payload,
headers=self.auth_header
)
if response.status_code == 200:
logging.info("✓ Search successful")
result = response.json()
logging.info(f"Search results: {len(result.get('results', []))} items")
# Log first result details
if result.get('results'):
first_result = result['results'][0]
logging.info(f"First result: {first_result.get('content', '')[:200]}...")
return True
else:
logging.error(f"✗ Search failed: {response.status_code} - {response.text}")
return False
except Exception as e:
logging.error(f"✗ Search error: {e}")
return False
def check_document_status(self):
"""Check document processing status"""
logging.info("Checking document status...")
try:
response = requests.get(
f"{self.base_url}/documents",
headers=self.auth_header
)
if response.status_code == 200:
documents = response.json()
logging.info(f"Found {len(documents)} documents")
for doc in documents:
logging.info(f"Document: {doc.get('name', 'Unknown')} - Status: {doc.get('status', 'Unknown')}")
return True
else:
logging.error(f"✗ Status check failed: {response.status_code} - {response.text}")
return False
except Exception as e:
logging.error(f"✗ Status check error: {e}")
return False
def main():
logging.info("Starting OCR PDF Upload & Search Test with Authentication")
logging.info("=" * 60)
# Initialize tester
tester = LightRAGOCRTest()
# Test server health
tester.test_server_health()
# Upload OCR PDF
if tester.upload_ocr_pdf():
# Wait for indexing
logging.info("Waiting 10 seconds for indexing to complete...")
time.sleep(10)
# Check document status
tester.check_document_status()
# Test search
tester.test_search("OCR")
tester.test_search("text extraction")
tester.test_search("document processing")
logging.info("=" * 60)
logging.info("TEST COMPLETED")
logging.info("Web UI: http://localhost:3015/webui/")
logging.info("Username: jleu3482")
logging.info("Password: jleu1212")
if __name__ == "__main__":
main()