Files
railseek6/test_ocr_fix.py

72 lines
2.5 KiB
Python

#!/usr/bin/env python3
"""
Test OCR functionality with the fixed subprocess communication
"""
import sys
import os
import tempfile
from PIL import Image, ImageDraw
# Add LightRAG to path
workspace_dir = os.getcwd()
lightrag_path = os.path.join(workspace_dir, 'LightRAG-main')
if lightrag_path not in sys.path:
sys.path.insert(0, lightrag_path)
def test_ocr_processor():
print('🧪 TESTING OCR PROCESSOR FIX')
print('=' * 40)
try:
print('1. Importing document processor...')
from lightrag.document_processor import get_document_processor
processor = get_document_processor()
print(f' ✅ OCR processor available: {processor.ocr_processor.ocr_available}')
if processor.ocr_processor.ocr_available:
print('2. Testing OCR on a simple image...')
# Create a simple test image with text
img = Image.new('RGB', (200, 50), color='white')
d = ImageDraw.Draw(img)
d.text((10, 10), 'Test OCR Text', fill='black')
with tempfile.NamedTemporaryFile(suffix='.png', delete=False) as f:
img.save(f.name)
temp_path = f.name
try:
result = processor.ocr_processor.extract_text_from_image(temp_path)
text = result.get('text', '').strip()
confidence = result.get('confidence', 0)
print(f' ✅ OCR test successful: "{text}"')
print(f' 📊 Confidence: {confidence}')
if text:
print(' ✅ OCR is working correctly!')
else:
print(' ⚠️ OCR returned empty text')
finally:
os.unlink(temp_path)
else:
print('2. OCR not available, checking stderr...')
if processor.ocr_processor._process:
stderr_output = processor.ocr_processor._process.stderr.read()
if stderr_output:
print(f' 📋 OCR stderr: {stderr_output}')
return processor.ocr_processor.ocr_available
except Exception as e:
print(f'❌ Error: {e}')
import traceback
traceback.print_exc()
return False
if __name__ == "__main__":
success = test_ocr_processor()
if success:
print('\n✅ OCR PROCESSOR IS WORKING!')
else:
print('\n❌ OCR PROCESSOR NEEDS FURTHER FIXING')