#!/usr/bin/env python3 """ Simple OCR PDF Test without Authentication Tests core OCR functionality by temporarily disabling auth or using direct methods """ import requests import json import time import sys import os import base64 # Configuration BASE_URL = "http://localhost:3015" OCR_PDF_PATH = "ocr.pdf" TEST_QUERY = "document processing" def log_step(message, status="INFO"): """Log step with timestamp""" timestamp = time.strftime("%Y-%m-%d %H:%M:%S") print(f"[{timestamp}] [{status}] {message}") def test_basic_endpoints(): """Test basic server endpoints""" log_step("Testing basic server endpoints...") endpoints = [ "/", "/health", "/webui/" ] for endpoint in endpoints: try: response = requests.get(f"{BASE_URL}{endpoint}", timeout=5) log_step(f"✓ {endpoint}: {response.status_code}") if response.status_code != 200: return False except Exception as e: log_step(f"✗ {endpoint}: {e}", "ERROR") return False return True def verify_ocr_pdf(): """Verify OCR PDF file""" log_step("Verifying OCR PDF file...") if not os.path.exists(OCR_PDF_PATH): log_step(f"✗ OCR PDF file not found: {OCR_PDF_PATH}", "ERROR") return False file_size = os.path.getsize(OCR_PDF_PATH) if file_size == 0: log_step("✗ OCR PDF file is empty", "ERROR") return False log_step(f"✓ OCR PDF file verified ({file_size} bytes)") return True def test_direct_upload(): """Test direct file upload with various authentication methods""" log_step("Testing direct file upload...") if not verify_ocr_pdf(): return False try: with open(OCR_PDF_PATH, 'rb') as file: files = {'file': (os.path.basename(OCR_PDF_PATH), file, 'application/pdf')} # Try with basic auth auth = ('jleu3482', 'jleu1212') upload_endpoint = f"{BASE_URL}/documents/upload" log_step(f"Uploading to: {upload_endpoint}") response = requests.post( upload_endpoint, files=files, auth=auth, timeout=30 ) if response.status_code in [200, 201]: result = response.json() log_step("✓ OCR PDF upload successful") log_step(f"Response: {json.dumps(result, indent=2)}") return True else: log_step(f"✗ Upload failed: {response.status_code} - {response.text}", "ERROR") return False except Exception as e: log_step(f"✗ Upload failed: {e}", "ERROR") return False def test_search_with_auth(): """Test search functionality with authentication""" log_step("Testing search functionality...") search_payload = { "query": TEST_QUERY, "top_k": 5 } try: # Try with basic auth auth = ('jleu3482', 'jleu1212') search_endpoint = f"{BASE_URL}/search" log_step(f"Searching via: {search_endpoint}") response = requests.post( search_endpoint, json=search_payload, auth=auth, timeout=15 ) if response.status_code == 200: results = response.json() log_step("✓ Search request successful") if isinstance(results, list) and len(results) > 0: log_step(f"✓ Search returned {len(results)} results") for i, result in enumerate(results[:3]): if isinstance(result, dict): content = result.get('content', result.get('text', str(result))) else: content = str(result) content_preview = content[:100] + "..." if len(content) > 100 else content log_step(f"Result {i+1}: {content_preview}") return True else: log_step("✗ Search returned no results", "WARNING") return False else: log_step(f"✗ Search failed: {response.status_code} - {response.text}", "ERROR") return False except Exception as e: log_step(f"✗ Search failed: {e}", "ERROR") return False def test_webui_workflow(): """Test the web UI workflow manually""" log_step("Testing Web UI workflow...") log_step("✓ Web UI accessible at: http://localhost:3015/webui/") log_step("✓ Login credentials: jleu3482 / jleu1212") log_step("✓ Manual test required: Upload ocr.pdf through web UI") log_step("✓ Manual test required: Verify indexing completes") log_step("✓ Manual test required: Test search functionality") return True def main(): """Main test function""" log_step("Starting Simple OCR PDF Test") log_step("=" * 50) # Test basic connectivity if not test_basic_endpoints(): log_step("✗ Basic connectivity test failed", "ERROR") sys.exit(1) # Test OCR PDF verification if not verify_ocr_pdf(): log_step("✗ OCR PDF verification failed", "ERROR") sys.exit(1) # Test direct upload upload_success = test_direct_upload() # Test search search_success = test_search_with_auth() # Web UI workflow webui_info = test_webui_workflow() # Final summary log_step("=" * 50) log_step("TEST RESULTS SUMMARY") log_step("=" * 50) log_step(f"Basic Connectivity: ✓ PASS") log_step(f"OCR PDF Verification: ✓ PASS") log_step(f"Direct Upload: {'✓ PASS' if upload_success else '✗ FAIL'}") log_step(f"Search Functionality: {'✓ PASS' if search_success else '✗ FAIL'}") log_step(f"Web UI Workflow: ✓ INFO (manual testing required)") if upload_success and search_success: log_step("🎉 CORE OCR FUNCTIONALITY VALIDATED!", "SUCCESS") log_step("The LightRAG system is working with OCR PDF processing") else: log_step("⚠️ PARTIAL VALIDATION - Authentication issues detected", "WARNING") log_step("Manual testing via Web UI is recommended") log_step("Web UI: http://localhost:3015/webui/") log_step("Username: jleu3482") log_step("Password: jleu1212") if __name__ == "__main__": main()