200 lines
6.4 KiB
Python
200 lines
6.4 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
Simple OCR PDF Test without Authentication
|
|
Tests core OCR functionality by temporarily disabling auth or using direct methods
|
|
"""
|
|
|
|
import requests
|
|
import json
|
|
import time
|
|
import sys
|
|
import os
|
|
import base64
|
|
|
|
# Configuration
|
|
BASE_URL = "http://localhost:3015"
|
|
OCR_PDF_PATH = "ocr.pdf"
|
|
TEST_QUERY = "document processing"
|
|
|
|
def log_step(message, status="INFO"):
|
|
"""Log step with timestamp"""
|
|
timestamp = time.strftime("%Y-%m-%d %H:%M:%S")
|
|
print(f"[{timestamp}] [{status}] {message}")
|
|
|
|
def test_basic_endpoints():
|
|
"""Test basic server endpoints"""
|
|
log_step("Testing basic server endpoints...")
|
|
|
|
endpoints = [
|
|
"/",
|
|
"/health",
|
|
"/webui/"
|
|
]
|
|
|
|
for endpoint in endpoints:
|
|
try:
|
|
response = requests.get(f"{BASE_URL}{endpoint}", timeout=5)
|
|
log_step(f"✓ {endpoint}: {response.status_code}")
|
|
if response.status_code != 200:
|
|
return False
|
|
except Exception as e:
|
|
log_step(f"✗ {endpoint}: {e}", "ERROR")
|
|
return False
|
|
return True
|
|
|
|
def verify_ocr_pdf():
|
|
"""Verify OCR PDF file"""
|
|
log_step("Verifying OCR PDF file...")
|
|
if not os.path.exists(OCR_PDF_PATH):
|
|
log_step(f"✗ OCR PDF file not found: {OCR_PDF_PATH}", "ERROR")
|
|
return False
|
|
|
|
file_size = os.path.getsize(OCR_PDF_PATH)
|
|
if file_size == 0:
|
|
log_step("✗ OCR PDF file is empty", "ERROR")
|
|
return False
|
|
|
|
log_step(f"✓ OCR PDF file verified ({file_size} bytes)")
|
|
return True
|
|
|
|
def test_direct_upload():
|
|
"""Test direct file upload with various authentication methods"""
|
|
log_step("Testing direct file upload...")
|
|
|
|
if not verify_ocr_pdf():
|
|
return False
|
|
|
|
try:
|
|
with open(OCR_PDF_PATH, 'rb') as file:
|
|
files = {'file': (os.path.basename(OCR_PDF_PATH), file, 'application/pdf')}
|
|
|
|
# Try with basic auth
|
|
auth = ('jleu3482', 'jleu1212')
|
|
|
|
upload_endpoint = f"{BASE_URL}/documents/upload"
|
|
log_step(f"Uploading to: {upload_endpoint}")
|
|
|
|
response = requests.post(
|
|
upload_endpoint,
|
|
files=files,
|
|
auth=auth,
|
|
timeout=30
|
|
)
|
|
|
|
if response.status_code in [200, 201]:
|
|
result = response.json()
|
|
log_step("✓ OCR PDF upload successful")
|
|
log_step(f"Response: {json.dumps(result, indent=2)}")
|
|
return True
|
|
else:
|
|
log_step(f"✗ Upload failed: {response.status_code} - {response.text}", "ERROR")
|
|
return False
|
|
|
|
except Exception as e:
|
|
log_step(f"✗ Upload failed: {e}", "ERROR")
|
|
return False
|
|
|
|
def test_search_with_auth():
|
|
"""Test search functionality with authentication"""
|
|
log_step("Testing search functionality...")
|
|
|
|
search_payload = {
|
|
"query": TEST_QUERY,
|
|
"top_k": 5
|
|
}
|
|
|
|
try:
|
|
# Try with basic auth
|
|
auth = ('jleu3482', 'jleu1212')
|
|
|
|
search_endpoint = f"{BASE_URL}/search"
|
|
log_step(f"Searching via: {search_endpoint}")
|
|
|
|
response = requests.post(
|
|
search_endpoint,
|
|
json=search_payload,
|
|
auth=auth,
|
|
timeout=15
|
|
)
|
|
|
|
if response.status_code == 200:
|
|
results = response.json()
|
|
log_step("✓ Search request successful")
|
|
|
|
if isinstance(results, list) and len(results) > 0:
|
|
log_step(f"✓ Search returned {len(results)} results")
|
|
for i, result in enumerate(results[:3]):
|
|
if isinstance(result, dict):
|
|
content = result.get('content', result.get('text', str(result)))
|
|
else:
|
|
content = str(result)
|
|
content_preview = content[:100] + "..." if len(content) > 100 else content
|
|
log_step(f"Result {i+1}: {content_preview}")
|
|
return True
|
|
else:
|
|
log_step("✗ Search returned no results", "WARNING")
|
|
return False
|
|
else:
|
|
log_step(f"✗ Search failed: {response.status_code} - {response.text}", "ERROR")
|
|
return False
|
|
|
|
except Exception as e:
|
|
log_step(f"✗ Search failed: {e}", "ERROR")
|
|
return False
|
|
|
|
def test_webui_workflow():
|
|
"""Test the web UI workflow manually"""
|
|
log_step("Testing Web UI workflow...")
|
|
log_step("✓ Web UI accessible at: http://localhost:3015/webui/")
|
|
log_step("✓ Login credentials: jleu3482 / jleu1212")
|
|
log_step("✓ Manual test required: Upload ocr.pdf through web UI")
|
|
log_step("✓ Manual test required: Verify indexing completes")
|
|
log_step("✓ Manual test required: Test search functionality")
|
|
return True
|
|
|
|
def main():
|
|
"""Main test function"""
|
|
log_step("Starting Simple OCR PDF Test")
|
|
log_step("=" * 50)
|
|
|
|
# Test basic connectivity
|
|
if not test_basic_endpoints():
|
|
log_step("✗ Basic connectivity test failed", "ERROR")
|
|
sys.exit(1)
|
|
|
|
# Test OCR PDF verification
|
|
if not verify_ocr_pdf():
|
|
log_step("✗ OCR PDF verification failed", "ERROR")
|
|
sys.exit(1)
|
|
|
|
# Test direct upload
|
|
upload_success = test_direct_upload()
|
|
|
|
# Test search
|
|
search_success = test_search_with_auth()
|
|
|
|
# Web UI workflow
|
|
webui_info = test_webui_workflow()
|
|
|
|
# Final summary
|
|
log_step("=" * 50)
|
|
log_step("TEST RESULTS SUMMARY")
|
|
log_step("=" * 50)
|
|
log_step(f"Basic Connectivity: ✓ PASS")
|
|
log_step(f"OCR PDF Verification: ✓ PASS")
|
|
log_step(f"Direct Upload: {'✓ PASS' if upload_success else '✗ FAIL'}")
|
|
log_step(f"Search Functionality: {'✓ PASS' if search_success else '✗ FAIL'}")
|
|
log_step(f"Web UI Workflow: ✓ INFO (manual testing required)")
|
|
|
|
if upload_success and search_success:
|
|
log_step("🎉 CORE OCR FUNCTIONALITY VALIDATED!", "SUCCESS")
|
|
log_step("The LightRAG system is working with OCR PDF processing")
|
|
else:
|
|
log_step("⚠️ PARTIAL VALIDATION - Authentication issues detected", "WARNING")
|
|
log_step("Manual testing via Web UI is recommended")
|
|
log_step("Web UI: http://localhost:3015/webui/")
|
|
log_step("Username: jleu3482")
|
|
log_step("Password: jleu1212")
|
|
|
|
if __name__ == "__main__":
|
|
main() |