Files
railseek6/test_ocr_no_auth_simple.py

200 lines
6.4 KiB
Python

#!/usr/bin/env python3
"""
Simple OCR PDF Test without Authentication
Tests core OCR functionality by temporarily disabling auth or using direct methods
"""
import requests
import json
import time
import sys
import os
import base64
# Configuration
BASE_URL = "http://localhost:3015"
OCR_PDF_PATH = "ocr.pdf"
TEST_QUERY = "document processing"
def log_step(message, status="INFO"):
"""Log step with timestamp"""
timestamp = time.strftime("%Y-%m-%d %H:%M:%S")
print(f"[{timestamp}] [{status}] {message}")
def test_basic_endpoints():
"""Test basic server endpoints"""
log_step("Testing basic server endpoints...")
endpoints = [
"/",
"/health",
"/webui/"
]
for endpoint in endpoints:
try:
response = requests.get(f"{BASE_URL}{endpoint}", timeout=5)
log_step(f"{endpoint}: {response.status_code}")
if response.status_code != 200:
return False
except Exception as e:
log_step(f"{endpoint}: {e}", "ERROR")
return False
return True
def verify_ocr_pdf():
"""Verify OCR PDF file"""
log_step("Verifying OCR PDF file...")
if not os.path.exists(OCR_PDF_PATH):
log_step(f"✗ OCR PDF file not found: {OCR_PDF_PATH}", "ERROR")
return False
file_size = os.path.getsize(OCR_PDF_PATH)
if file_size == 0:
log_step("✗ OCR PDF file is empty", "ERROR")
return False
log_step(f"✓ OCR PDF file verified ({file_size} bytes)")
return True
def test_direct_upload():
"""Test direct file upload with various authentication methods"""
log_step("Testing direct file upload...")
if not verify_ocr_pdf():
return False
try:
with open(OCR_PDF_PATH, 'rb') as file:
files = {'file': (os.path.basename(OCR_PDF_PATH), file, 'application/pdf')}
# Try with basic auth
auth = ('jleu3482', 'jleu1212')
upload_endpoint = f"{BASE_URL}/documents/upload"
log_step(f"Uploading to: {upload_endpoint}")
response = requests.post(
upload_endpoint,
files=files,
auth=auth,
timeout=30
)
if response.status_code in [200, 201]:
result = response.json()
log_step("✓ OCR PDF upload successful")
log_step(f"Response: {json.dumps(result, indent=2)}")
return True
else:
log_step(f"✗ Upload failed: {response.status_code} - {response.text}", "ERROR")
return False
except Exception as e:
log_step(f"✗ Upload failed: {e}", "ERROR")
return False
def test_search_with_auth():
"""Test search functionality with authentication"""
log_step("Testing search functionality...")
search_payload = {
"query": TEST_QUERY,
"top_k": 5
}
try:
# Try with basic auth
auth = ('jleu3482', 'jleu1212')
search_endpoint = f"{BASE_URL}/search"
log_step(f"Searching via: {search_endpoint}")
response = requests.post(
search_endpoint,
json=search_payload,
auth=auth,
timeout=15
)
if response.status_code == 200:
results = response.json()
log_step("✓ Search request successful")
if isinstance(results, list) and len(results) > 0:
log_step(f"✓ Search returned {len(results)} results")
for i, result in enumerate(results[:3]):
if isinstance(result, dict):
content = result.get('content', result.get('text', str(result)))
else:
content = str(result)
content_preview = content[:100] + "..." if len(content) > 100 else content
log_step(f"Result {i+1}: {content_preview}")
return True
else:
log_step("✗ Search returned no results", "WARNING")
return False
else:
log_step(f"✗ Search failed: {response.status_code} - {response.text}", "ERROR")
return False
except Exception as e:
log_step(f"✗ Search failed: {e}", "ERROR")
return False
def test_webui_workflow():
"""Test the web UI workflow manually"""
log_step("Testing Web UI workflow...")
log_step("✓ Web UI accessible at: http://localhost:3015/webui/")
log_step("✓ Login credentials: jleu3482 / jleu1212")
log_step("✓ Manual test required: Upload ocr.pdf through web UI")
log_step("✓ Manual test required: Verify indexing completes")
log_step("✓ Manual test required: Test search functionality")
return True
def main():
"""Main test function"""
log_step("Starting Simple OCR PDF Test")
log_step("=" * 50)
# Test basic connectivity
if not test_basic_endpoints():
log_step("✗ Basic connectivity test failed", "ERROR")
sys.exit(1)
# Test OCR PDF verification
if not verify_ocr_pdf():
log_step("✗ OCR PDF verification failed", "ERROR")
sys.exit(1)
# Test direct upload
upload_success = test_direct_upload()
# Test search
search_success = test_search_with_auth()
# Web UI workflow
webui_info = test_webui_workflow()
# Final summary
log_step("=" * 50)
log_step("TEST RESULTS SUMMARY")
log_step("=" * 50)
log_step(f"Basic Connectivity: ✓ PASS")
log_step(f"OCR PDF Verification: ✓ PASS")
log_step(f"Direct Upload: {'✓ PASS' if upload_success else '✗ FAIL'}")
log_step(f"Search Functionality: {'✓ PASS' if search_success else '✗ FAIL'}")
log_step(f"Web UI Workflow: ✓ INFO (manual testing required)")
if upload_success and search_success:
log_step("🎉 CORE OCR FUNCTIONALITY VALIDATED!", "SUCCESS")
log_step("The LightRAG system is working with OCR PDF processing")
else:
log_step("⚠️ PARTIAL VALIDATION - Authentication issues detected", "WARNING")
log_step("Manual testing via Web UI is recommended")
log_step("Web UI: http://localhost:3015/webui/")
log_step("Username: jleu3482")
log_step("Password: jleu1212")
if __name__ == "__main__":
main()