#!/usr/bin/env python3 """ Test OCR functionality with the fixed subprocess communication """ import sys import os import tempfile from PIL import Image, ImageDraw # Add LightRAG to path workspace_dir = os.getcwd() lightrag_path = os.path.join(workspace_dir, 'LightRAG-main') if lightrag_path not in sys.path: sys.path.insert(0, lightrag_path) def test_ocr_processor(): print('๐Ÿงช TESTING OCR PROCESSOR FIX') print('=' * 40) try: print('1. Importing document processor...') from lightrag.document_processor import get_document_processor processor = get_document_processor() print(f' โœ… OCR processor available: {processor.ocr_processor.ocr_available}') if processor.ocr_processor.ocr_available: print('2. Testing OCR on a simple image...') # Create a simple test image with text img = Image.new('RGB', (200, 50), color='white') d = ImageDraw.Draw(img) d.text((10, 10), 'Test OCR Text', fill='black') with tempfile.NamedTemporaryFile(suffix='.png', delete=False) as f: img.save(f.name) temp_path = f.name try: result = processor.ocr_processor.extract_text_from_image(temp_path) text = result.get('text', '').strip() confidence = result.get('confidence', 0) print(f' โœ… OCR test successful: "{text}"') print(f' ๐Ÿ“Š Confidence: {confidence}') if text: print(' โœ… OCR is working correctly!') else: print(' โš ๏ธ OCR returned empty text') finally: os.unlink(temp_path) else: print('2. OCR not available, checking stderr...') if processor.ocr_processor._process: stderr_output = processor.ocr_processor._process.stderr.read() if stderr_output: print(f' ๐Ÿ“‹ OCR stderr: {stderr_output}') return processor.ocr_processor.ocr_available except Exception as e: print(f'โŒ Error: {e}') import traceback traceback.print_exc() return False if __name__ == "__main__": success = test_ocr_processor() if success: print('\nโœ… OCR PROCESSOR IS WORKING!') else: print('\nโŒ OCR PROCESSOR NEEDS FURTHER FIXING')