railseek6/simple_ocr_processor.py

import os
import logging
import sys
from paddleocr import PaddleOCR

# Configure logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

class SimpleOCRProcessor:
    """Simple OCR processor that uses GPU exclusively without fallback"""

    def __init__(self):
        self.available = True
        self.ocr_engine = None
        self._initialize()

    def _initialize(self):
        """Initialize PaddleOCR with GPU exclusively - no CPU fallback"""
        try:
            logger.info("Initializing PaddleOCR with GPU mode only")
            # Force GPU usage - no CPU fallback
            self.ocr_engine = PaddleOCR(
                use_gpu=True,
                use_angle_cls=True,
                lang='en',
                show_log=False,
                gpu_mem=2000  # Limit GPU memory to avoid conflicts
            )
            logger.info("✅ PaddleOCR GPU initialized successfully")
        except Exception as e:
            logger.error(f"❌ PaddleOCR GPU initialization failed: {e}")
            self.ocr_engine = None
            self.available = False
            raise RuntimeError(f"PaddleOCR GPU initialization failed: {e}")

    def extract_text_from_image(self, image_path):
        """Extract text from image using OCR"""
        if not self.available or not self.ocr_engine:
            return {"text": "", "confidence": 0.0, "bboxes": [], "line_count": 0}

        try:
            # Check if image exists
            if not os.path.exists(image_path):
                logger.warning(f"Image not found: {image_path}")
                return {"text": "", "confidence": 0.0, "bboxes": [], "line_count": 0}

            # Perform OCR
            result = self.ocr_engine.ocr(image_path)

            if not result or not result[0]:
                return {"text": "", "confidence": 0.0, "bboxes": [], "line_count": 0}

            extracted_text = []
            bboxes = []
            total_confidence = 0.0
            line_count = 0

            for line in result[0]:
                try:
                    # Handle different PaddleOCR result structures
                    if len(line) == 2:
                        # Standard structure: [[bbox], (text, confidence)]
                        bbox, (text, confidence) = line
                    elif len(line) >= 1:
                        # Handle alternative structures
                        bbox = line[0] if len(line) > 0 else []
                        if len(line) > 1:
                            if isinstance(line[1], (list, tuple)) and len(line[1]) >= 2:
                                text, confidence = line[1][0], line[1][1]
                            else:
                                text, confidence = str(line[1]) if len(line) > 1 else "", 0.0
                        else:
                            text, confidence = "", 0.0
                    else:
                        continue

                    # Ensure text is string and confidence is float
                    text_str = str(text) if text is not None else ""
                    confidence_float = 0.0
                    if confidence is not None:
                        if isinstance(confidence, (int, float)):
                            confidence_float = float(confidence)
                        elif isinstance(confidence, str):
                            try:
                                confidence_float = float(confidence)
                            except ValueError:
                                logger.warning(f"Could not convert confidence string to float: {confidence}")
                                confidence_float = 0.0
                        else:
                            logger.warning(f"Unexpected confidence type: {type(confidence)}, value: {confidence}")
                            confidence_float = 0.0
                    else:
                        confidence_float = 0.0

                    extracted_text.append(text_str)
                    bboxes.append(bbox)
                    total_confidence += confidence_float
                    line_count += 1

                except (TypeError, ValueError, IndexError) as e:
                    logger.warning(f"Type conversion error in OCR line processing: {e}")
                    # Add empty text and continue
                    extracted_text.append("")
                    bboxes.append([])
                    total_confidence += 0.0
                    line_count += 1

            try:
                avg_confidence = total_confidence / line_count if line_count > 0 else 0.0
            except (TypeError, ZeroDivisionError):
                avg_confidence = 0.0

            full_text = "\n".join(extracted_text)

            logger.info(f"OCR extracted {len(full_text)} characters with confidence {avg_confidence:.3f}")

            return {
                "text": full_text,
                "confidence": avg_confidence,
                "bboxes": bboxes,
                "line_count": line_count
            }
        except Exception as e:
            logger.error(f"OCR processing failed: {e}")
            return {"text": "", "confidence": 0.0, "bboxes": [], "line_count": 0}

# Singleton
_ocr_instance = None

def get_simple_ocr_processor():
    global _ocr_instance
    if _ocr_instance is None:
        _ocr_instance = SimpleOCRProcessor()
    return _ocr_instance

if __name__ == "__main__":
    processor = get_simple_ocr_processor()
    if processor.available:
        print("✅ Simple OCR processor is working")
        # Test with an image
        test_image = "extracted_images/image1.png"
        if os.path.exists(test_image):
            result = processor.extract_text_from_image(test_image)
            print(f"OCR Result: {len(result['text'])} chars, confidence: {result['confidence']:.3f}")
            if result['text']:
                print(f"Text: {result['text'][:100]}...")
    else:
        print("❌ Simple OCR processor failed")