72 lines
2.5 KiB
Python
72 lines
2.5 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
Test OCR functionality with the fixed subprocess communication
|
|
"""
|
|
|
|
import sys
|
|
import os
|
|
import tempfile
|
|
from PIL import Image, ImageDraw
|
|
|
|
# Add LightRAG to path
|
|
workspace_dir = os.getcwd()
|
|
lightrag_path = os.path.join(workspace_dir, 'LightRAG-main')
|
|
if lightrag_path not in sys.path:
|
|
sys.path.insert(0, lightrag_path)
|
|
|
|
def test_ocr_processor():
|
|
print('🧪 TESTING OCR PROCESSOR FIX')
|
|
print('=' * 40)
|
|
|
|
try:
|
|
print('1. Importing document processor...')
|
|
from lightrag.document_processor import get_document_processor
|
|
processor = get_document_processor()
|
|
print(f' ✅ OCR processor available: {processor.ocr_processor.ocr_available}')
|
|
|
|
if processor.ocr_processor.ocr_available:
|
|
print('2. Testing OCR on a simple image...')
|
|
# Create a simple test image with text
|
|
img = Image.new('RGB', (200, 50), color='white')
|
|
d = ImageDraw.Draw(img)
|
|
d.text((10, 10), 'Test OCR Text', fill='black')
|
|
|
|
with tempfile.NamedTemporaryFile(suffix='.png', delete=False) as f:
|
|
img.save(f.name)
|
|
temp_path = f.name
|
|
|
|
try:
|
|
result = processor.ocr_processor.extract_text_from_image(temp_path)
|
|
text = result.get('text', '').strip()
|
|
confidence = result.get('confidence', 0)
|
|
print(f' ✅ OCR test successful: "{text}"')
|
|
print(f' 📊 Confidence: {confidence}')
|
|
|
|
if text:
|
|
print(' ✅ OCR is working correctly!')
|
|
else:
|
|
print(' ⚠️ OCR returned empty text')
|
|
|
|
finally:
|
|
os.unlink(temp_path)
|
|
else:
|
|
print('2. OCR not available, checking stderr...')
|
|
if processor.ocr_processor._process:
|
|
stderr_output = processor.ocr_processor._process.stderr.read()
|
|
if stderr_output:
|
|
print(f' 📋 OCR stderr: {stderr_output}')
|
|
|
|
return processor.ocr_processor.ocr_available
|
|
|
|
except Exception as e:
|
|
print(f'❌ Error: {e}')
|
|
import traceback
|
|
traceback.print_exc()
|
|
return False
|
|
|
|
if __name__ == "__main__":
|
|
success = test_ocr_processor()
|
|
if success:
|
|
print('\n✅ OCR PROCESSOR IS WORKING!')
|
|
else:
|
|
print('\n❌ OCR PROCESSOR NEEDS FURTHER FIXING') |