railseek6/test_ocr_fix.py

#!/usr/bin/env python3
"""
Test OCR functionality with the fixed subprocess communication
"""

import sys
import os
import tempfile
from PIL import Image, ImageDraw

# Add LightRAG to path
workspace_dir = os.getcwd()
lightrag_path = os.path.join(workspace_dir, 'LightRAG-main')
if lightrag_path not in sys.path:
    sys.path.insert(0, lightrag_path)

def test_ocr_processor():
    print('🧪 TESTING OCR PROCESSOR FIX')
    print('=' * 40)

    try:
        print('1. Importing document processor...')
        from lightrag.document_processor import get_document_processor
        processor = get_document_processor()
        print(f'   ✅ OCR processor available: {processor.ocr_processor.ocr_available}')

        if processor.ocr_processor.ocr_available:
            print('2. Testing OCR on a simple image...')
            # Create a simple test image with text
            img = Image.new('RGB', (200, 50), color='white')
            d = ImageDraw.Draw(img)
            d.text((10, 10), 'Test OCR Text', fill='black')

            with tempfile.NamedTemporaryFile(suffix='.png', delete=False) as f:
                img.save(f.name)
                temp_path = f.name

            try:
                result = processor.ocr_processor.extract_text_from_image(temp_path)
                text = result.get('text', '').strip()
                confidence = result.get('confidence', 0)
                print(f'   ✅ OCR test successful: "{text}"')
                print(f'   📊 Confidence: {confidence}')

                if text:
                    print('   ✅ OCR is working correctly!')
                else:
                    print('   ⚠️  OCR returned empty text')

            finally:
                os.unlink(temp_path)
        else:
            print('2. OCR not available, checking stderr...')
            if processor.ocr_processor._process:
                stderr_output = processor.ocr_processor._process.stderr.read()
                if stderr_output:
                    print(f'   📋 OCR stderr: {stderr_output}')

        return processor.ocr_processor.ocr_available

    except Exception as e:
        print(f'❌ Error: {e}')
        import traceback
        traceback.print_exc()
        return False

if __name__ == "__main__":
    success = test_ocr_processor()
    if success:
        print('\n✅ OCR PROCESSOR IS WORKING!')
    else:
        print('\n❌ OCR PROCESSOR NEEDS FURTHER FIXING')