166 lines
5.7 KiB
Python
166 lines
5.7 KiB
Python
import requests
|
|
import json
|
|
import base64
|
|
import time
|
|
import logging
|
|
from pathlib import Path
|
|
|
|
# Configure logging
|
|
logging.basicConfig(
|
|
level=logging.INFO,
|
|
format='[%(asctime)s] [%(levelname)s] %(message)s',
|
|
datefmt='%Y-%m-%d %H:%M:%S'
|
|
)
|
|
|
|
class LightRAGOCRTest:
|
|
def __init__(self, base_url="http://localhost:3015", username="jleu3482", password="jleu1212"):
|
|
self.base_url = base_url
|
|
self.username = username
|
|
self.password = password
|
|
self.auth_header = self._create_auth_header()
|
|
|
|
def _create_auth_header(self):
|
|
"""Create basic auth header"""
|
|
credentials = f"{self.username}:{self.password}"
|
|
encoded_credentials = base64.b64encode(credentials.encode()).decode()
|
|
return {"Authorization": f"Basic {encoded_credentials}"}
|
|
|
|
def test_server_health(self):
|
|
"""Test basic server connectivity"""
|
|
logging.info("Testing server health endpoints...")
|
|
|
|
endpoints = {
|
|
"Root": "/",
|
|
"Health": "/health",
|
|
"WebUI": "/webui/"
|
|
}
|
|
|
|
for name, endpoint in endpoints.items():
|
|
try:
|
|
response = requests.get(f"{self.base_url}{endpoint}", headers=self.auth_header)
|
|
status = "✓" if response.status_code == 200 else "✗"
|
|
logging.info(f"{status} {name} ({endpoint}): {response.status_code}")
|
|
except Exception as e:
|
|
logging.error(f"✗ {name} ({endpoint}): {e}")
|
|
|
|
def upload_ocr_pdf(self, file_path="ocr.pdf"):
|
|
"""Upload OCR PDF file with authentication"""
|
|
logging.info(f"Uploading OCR PDF: {file_path}")
|
|
|
|
if not Path(file_path).exists():
|
|
logging.error(f"✗ File not found: {file_path}")
|
|
return False
|
|
|
|
try:
|
|
with open(file_path, 'rb') as f:
|
|
files = {'file': (file_path, f, 'application/pdf')}
|
|
response = requests.post(
|
|
f"{self.base_url}/documents/upload",
|
|
files=files,
|
|
headers=self.auth_header
|
|
)
|
|
|
|
if response.status_code == 200:
|
|
logging.info("✓ Upload successful")
|
|
result = response.json()
|
|
logging.info(f"Upload result: {json.dumps(result, indent=2)}")
|
|
return True
|
|
else:
|
|
logging.error(f"✗ Upload failed: {response.status_code} - {response.text}")
|
|
return False
|
|
|
|
except Exception as e:
|
|
logging.error(f"✗ Upload error: {e}")
|
|
return False
|
|
|
|
def test_search(self, query="OCR text extraction"):
|
|
"""Test search functionality with authentication"""
|
|
logging.info(f"Testing search with query: '{query}'")
|
|
|
|
try:
|
|
payload = {
|
|
"query": query,
|
|
"top_k": 5
|
|
}
|
|
|
|
response = requests.post(
|
|
f"{self.base_url}/search",
|
|
json=payload,
|
|
headers=self.auth_header
|
|
)
|
|
|
|
if response.status_code == 200:
|
|
logging.info("✓ Search successful")
|
|
result = response.json()
|
|
logging.info(f"Search results: {len(result.get('results', []))} items")
|
|
|
|
# Log first result details
|
|
if result.get('results'):
|
|
first_result = result['results'][0]
|
|
logging.info(f"First result: {first_result.get('content', '')[:200]}...")
|
|
|
|
return True
|
|
else:
|
|
logging.error(f"✗ Search failed: {response.status_code} - {response.text}")
|
|
return False
|
|
|
|
except Exception as e:
|
|
logging.error(f"✗ Search error: {e}")
|
|
return False
|
|
|
|
def check_document_status(self):
|
|
"""Check document processing status"""
|
|
logging.info("Checking document status...")
|
|
|
|
try:
|
|
response = requests.get(
|
|
f"{self.base_url}/documents",
|
|
headers=self.auth_header
|
|
)
|
|
|
|
if response.status_code == 200:
|
|
documents = response.json()
|
|
logging.info(f"Found {len(documents)} documents")
|
|
for doc in documents:
|
|
logging.info(f"Document: {doc.get('name', 'Unknown')} - Status: {doc.get('status', 'Unknown')}")
|
|
return True
|
|
else:
|
|
logging.error(f"✗ Status check failed: {response.status_code} - {response.text}")
|
|
return False
|
|
|
|
except Exception as e:
|
|
logging.error(f"✗ Status check error: {e}")
|
|
return False
|
|
|
|
def main():
|
|
logging.info("Starting OCR PDF Upload & Search Test with Authentication")
|
|
logging.info("=" * 60)
|
|
|
|
# Initialize tester
|
|
tester = LightRAGOCRTest()
|
|
|
|
# Test server health
|
|
tester.test_server_health()
|
|
|
|
# Upload OCR PDF
|
|
if tester.upload_ocr_pdf():
|
|
# Wait for indexing
|
|
logging.info("Waiting 10 seconds for indexing to complete...")
|
|
time.sleep(10)
|
|
|
|
# Check document status
|
|
tester.check_document_status()
|
|
|
|
# Test search
|
|
tester.test_search("OCR")
|
|
tester.test_search("text extraction")
|
|
tester.test_search("document processing")
|
|
|
|
logging.info("=" * 60)
|
|
logging.info("TEST COMPLETED")
|
|
logging.info("Web UI: http://localhost:3015/webui/")
|
|
logging.info("Username: jleu3482")
|
|
logging.info("Password: jleu1212")
|
|
|
|
if __name__ == "__main__":
|
|
main() |