#!/usr/bin/env python3 """ Direct test of OCR fix with GPU mode """ import sys import os sys.path.append('LightRAG-main') from lightrag.document_processor import OCRProcessor def test_ocr_fix(): print("๐Ÿงช Testing OCR Fix with GPU Mode") print("=" * 50) try: # Initialize OCR with GPU print("๐Ÿ”ง Initializing OCR processor with GPU...") ocr = OCRProcessor(use_gpu=True) print(f"โœ… OCR available: {ocr.ocr_available}") print(f"โœ… Using GPU: {ocr.use_gpu}") if not ocr.ocr_available: print("โŒ OCR not available, cannot proceed") return False # Test extraction on the PDF print("\n๐Ÿ“„ Testing OCR extraction on ocr.pdf...") result = ocr.extract_text_from_image('ocr.pdf') print(f"โœ… Extracted text length: {len(result['text'])}") print(f"โœ… Confidence: {result['confidence']}") print(f"โœ… Line count: {result['line_count']}") if result['text']: print("\n๐Ÿ“ First 500 characters:") print("-" * 50) print(result['text'][:500]) print("-" * 50) # Check if we got meaningful content if len(result['text']) > 100: print("๐ŸŽ‰ SUCCESS: OCR extracted meaningful text from scanned PDF!") return True else: print("โš ๏ธ WARNING: Text extracted but seems too short") return False else: print("โŒ FAILED: No text extracted from PDF") return False except Exception as e: print(f"โŒ ERROR: {e}") import traceback traceback.print_exc() return False if __name__ == "__main__": success = test_ocr_fix() sys.exit(0 if success else 1)