#!/usr/bin/env python3 """ Production OCR Workflow Test Tests the complete OCR PDF processing with PaddleOCR and GPU acceleration """ import os import sys import time import requests import json from pathlib import Path def test_production_ocr_workflow(): """Test complete OCR workflow with production configuration""" print("šŸš€ Testing Production OCR Workflow with PaddleOCR and GPU Acceleration") print("=" * 60) # Test server accessibility print("\nšŸ” Testing server accessibility...") try: response = requests.get("http://localhost:3015/", timeout=10) if response.status_code == 200: print("āœ… Server is accessible") else: print(f"āŒ Server returned status: {response.status_code}") return False except Exception as e: print(f"āŒ Cannot connect to server: {e}") return False # Test OCR PDF upload print("\nšŸ“ Testing OCR PDF upload...") pdf_file = "inputs/ocr.pdf" if not os.path.exists(pdf_file): print(f"āŒ OCR PDF file not found: {pdf_file}") return False print(f"šŸ“„ Using OCR PDF: {pdf_file}") # Upload the PDF try: with open(pdf_file, 'rb') as f: files = {'file': (os.path.basename(pdf_file), f, 'application/pdf')} response = requests.post("http://localhost:3015/documents/upload", files=files, timeout=30) if response.status_code == 200: upload_result = response.json() print(f"āœ… Upload successful: {json.dumps(upload_result, indent=2)}") # Wait for processing print("\nā³ Waiting for OCR processing (30 seconds)...") time.sleep(30) # Check document status print("\nšŸ“‹ Checking document status...") try: doc_response = requests.get("http://localhost:3015/documents", timeout=10) if doc_response.status_code == 200: doc_status = doc_response.json() print(f"šŸ“„ Documents in system: {json.dumps(doc_status, indent=2)}") # Check if we have successful documents if 'statuses' in doc_status: if 'success' in doc_status['statuses'] and doc_status['statuses']['success']: print("šŸŽ‰ OCR processing successful! Documents indexed successfully.") return True elif 'failed' in doc_status['statuses'] and doc_status['statuses']['failed']: failed_docs = doc_status['statuses']['failed'] for doc in failed_docs: if doc.get('file_path') == 'ocr.pdf': print(f"āŒ OCR processing failed: {doc.get('error_msg', 'Unknown error')}") return False else: print("āš ļø No document status available yet") else: print(f"āŒ Failed to get document status: {doc_response.status_code}") except Exception as e: print(f"āŒ Error checking document status: {e}") else: print(f"āŒ Upload failed: {response.status_code} - {response.text}") return False except Exception as e: print(f"āŒ Upload error: {e}") return False return True def test_paddleocr_gpu(): """Test PaddleOCR with GPU acceleration""" print("\nšŸ”¬ Testing PaddleOCR GPU Acceleration...") try: import paddleocr import paddle # Check if GPU is available if paddle.device.is_compiled_with_cuda(): print("āœ… CUDA is available") gpu_count = paddle.device.cuda.device_count() print(f"āœ… GPU devices: {gpu_count}") # Test PaddleOCR initialization try: ocr = paddleocr.PaddleOCR(use_angle_cls=True, lang='en', use_gpu=True) print("āœ… PaddleOCR initialized with GPU support") # Test with a simple image if available test_image = "test_ocr_image.png" if os.path.exists(test_image): result = ocr.ocr(test_image, cls=True) print(f"āœ… PaddleOCR test successful: {len(result)} results") else: print("āš ļø No test image available, but PaddleOCR initialized successfully") return True except Exception as e: print(f"āŒ PaddleOCR GPU initialization failed: {e}") return False else: print("āŒ CUDA not available - using CPU fallback") return False except ImportError as e: print(f"āŒ PaddleOCR not installed: {e}") return False except Exception as e: print(f"āŒ GPU test error: {e}") return False def test_database_connections(): """Test database connectivity""" print("\nšŸ—„ļø Testing Database Connections...") databases = { "Redis": "redis://localhost:6379", "Neo4j": "bolt://localhost:7687", "Qdrant": "http://localhost:6333", "PostgreSQL": "postgresql://jleu3482:jleu1212@localhost:5432/rag_anything" } all_connected = True # Test Redis try: import redis r = redis.Redis(host='localhost', port=6379, db=1) r.ping() print("āœ… Redis connection successful") except Exception as e: print(f"āŒ Redis connection failed: {e}") all_connected = False # Test Neo4j try: from neo4j import GraphDatabase driver = GraphDatabase.driver("bolt://localhost:7687", auth=("neo4j", "jleu1212")) with driver.session() as session: session.run("RETURN 1") print("āœ… Neo4j connection successful") except Exception as e: print(f"āŒ Neo4j connection failed: {e}") all_connected = False # Test Qdrant (simple HTTP check) try: response = requests.get("http://localhost:6333", timeout=5) if response.status_code == 200: print("āœ… Qdrant connection successful") else: print(f"āŒ Qdrant connection failed: {response.status_code}") all_connected = False except Exception as e: print(f"āŒ Qdrant connection failed: {e}") all_connected = False # Test PostgreSQL try: import psycopg2 conn = psycopg2.connect( host="localhost", port=5432, user="jleu3482", password="jleu1212", database="rag_anything" ) conn.close() print("āœ… PostgreSQL connection successful") except Exception as e: print(f"āŒ PostgreSQL connection failed: {e}") all_connected = False return all_connected def main(): """Run complete production OCR workflow test""" print("šŸ­ Production OCR Workflow Test") print("=" * 60) # Test database connections db_success = test_database_connections() # Test PaddleOCR GPU ocr_success = test_paddleocr_gpu() # Test OCR workflow workflow_success = test_production_ocr_workflow() # Summary print("\n" + "=" * 60) print("šŸ“Š TEST SUMMARY") print("=" * 60) print(f"šŸ—„ļø Database Connections: {'āœ… PASS' if db_success else 'āŒ FAIL'}") print(f"šŸ”¬ PaddleOCR GPU: {'āœ… PASS' if ocr_success else 'āš ļø PARTIAL (CPU fallback)'}") print(f"šŸ“„ OCR Workflow: {'āœ… PASS' if workflow_success else 'āŒ FAIL'}") if db_success and workflow_success: print("\nšŸŽ‰ PRODUCTION OCR WORKFLOW TEST COMPLETED SUCCESSFULLY!") return True else: print("\nšŸ’„ PRODUCTION OCR WORKFLOW TEST FAILED!") return False if __name__ == "__main__": success = main() sys.exit(0 if success else 1)