import os import sys import asyncio from pathlib import Path # Add LightRAG to path sys.path.append('LightRAG-main') async def test_direct_ocr(): """Test OCR directly using the document processor""" print("Testing direct OCR with GPU...") print("=" * 50) try: # Import the document processor from lightrag.document_processor import get_document_processor print("Initializing document processor...") processor = get_document_processor() # Test file ocr_pdf_path = "ocr.pdf" if not os.path.exists(ocr_pdf_path): print(f"Error: OCR PDF file not found at {ocr_pdf_path}") return False print(f"Processing OCR PDF: {ocr_pdf_path}") # Process the document directly result = await processor.process_document(ocr_pdf_path) print(f"Processing result: {result.success}") print(f"Error message: {result.error}") print(f"Content length: {len(result.content)}") if result.success: print("✅ OCR processing successful!") print(f"Extracted content preview: {result.content[:500]}...") # Check if GPU was used if hasattr(processor, 'use_gpu'): print(f"GPU usage: {processor.use_gpu}") # Check for any fallback messages if "fallback" in result.content.lower(): print("⚠️ WARNING: Fallback detected in content") return True else: print(f"❌ OCR processing failed: {result.error}") return False except ImportError as e: print(f"❌ Import error: {e}") return False except Exception as e: print(f"❌ Unexpected error: {e}") import traceback traceback.print_exc() return False async def check_paddleocr_gpu(): """Check if PaddleOCR is using GPU""" print("\nChecking PaddleOCR GPU status...") print("=" * 30) try: import paddle print(f"Paddle version: {paddle.__version__}") print(f"Paddle is compiled with CUDA: {paddle.is_compiled_with_cuda()}") print(f"Paddle device: {paddle.get_device()}") print(f"GPU available: {paddle.is_compiled_with_cuda() and paddle.device.cuda.device_count() > 0}") if paddle.device.cuda.device_count() > 0: print(f"Number of GPUs: {paddle.device.cuda.device_count()}") for i in range(paddle.device.cuda.device_count()): print(f"GPU {i}: {paddle.device.cuda.get_device_name(i)}") else: print("No GPUs detected by PaddlePaddle") except ImportError: print("❌ PaddlePaddle not installed") except Exception as e: print(f"❌ Error checking PaddleOCR: {e}") async def main(): """Main test function""" print("Direct OCR Test Suite") print("=" * 50) # Check PaddleOCR GPU status await check_paddleocr_gpu() print("\n" + "=" * 50) # Test direct OCR success = await test_direct_ocr() print("\n" + "=" * 50) if success: print("🎉 Direct OCR test completed successfully!") else: print("💥 Direct OCR test failed!") return success if __name__ == "__main__": # Run the async tests asyncio.run(main())