railseek6/debug_webui_ocr.py

#!/usr/bin/env python3
"""
Debug script to test Web UI OCR processor
"""

import os
import sys
import logging

# Set up logging to see what's happening
logging.basicConfig(level=logging.DEBUG, format='%(asctime)s - %(levelname)s - %(message)s')
logger = logging.getLogger(__name__)

def test_webui_processor():
    """Test the processor as it would run in Web UI"""
    print("🧪 TESTING WEB UI OCR PROCESSOR")
    print("=" * 50)

    # Simulate Web UI environment
    print("1. Adding workspace directory to sys.path...")
    workspace_dir = os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
    print(f"   Workspace dir: {workspace_dir}")

    if workspace_dir not in sys.path:
        sys.path.insert(0, workspace_dir)
        print("   ✅ Added workspace to sys.path")

    print("2. Importing simple_ocr_processor...")
    try:
        from simple_ocr_processor import get_simple_ocr_processor
        print("   ✅ Import successful")

        print("3. Initializing OCR processor...")
        processor = get_simple_ocr_processor()
        print(f"   Processor available: {processor.available}")
        print(f"   Processor type: {type(processor)}")

        if hasattr(processor, 'ocr_engine'):
            print(f"   OCR Engine type: {type(processor.ocr_engine)}")
        else:
            print("   ❌ No OCR engine found!")

        # Test with an image
        test_image = "extracted_images/image1.png"
        if os.path.exists(test_image):
            print(f"4. Testing OCR on {test_image}...")
            result = processor.extract_text_from_image(test_image)
            print(f"   OCR Result: {len(result['text'])} chars, confidence: {result['confidence']:.3f}")
            if result['text']:
                print(f"   Text: {result['text'][:200]}...")
            else:
                print("   No text extracted")
        else:
            print(f"❌ Test image not found: {test_image}")

    except Exception as e:
        print(f"❌ Error: {e}")
        import traceback
        traceback.print_exc()

if __name__ == "__main__":
    test_webui_processor()