150 lines
6.0 KiB
Python
150 lines
6.0 KiB
Python
import os
|
|
import logging
|
|
import sys
|
|
from paddleocr import PaddleOCR
|
|
|
|
# Configure logging
|
|
logging.basicConfig(level=logging.INFO)
|
|
logger = logging.getLogger(__name__)
|
|
|
|
class SimpleOCRProcessor:
|
|
"""Simple OCR processor that uses GPU exclusively without fallback"""
|
|
|
|
def __init__(self):
|
|
self.available = True
|
|
self.ocr_engine = None
|
|
self._initialize()
|
|
|
|
def _initialize(self):
|
|
"""Initialize PaddleOCR with GPU exclusively - no CPU fallback"""
|
|
try:
|
|
logger.info("Initializing PaddleOCR with GPU mode only")
|
|
# Force GPU usage - no CPU fallback
|
|
self.ocr_engine = PaddleOCR(
|
|
use_gpu=True,
|
|
use_angle_cls=True,
|
|
lang='en',
|
|
show_log=False,
|
|
gpu_mem=2000 # Limit GPU memory to avoid conflicts
|
|
)
|
|
logger.info("✅ PaddleOCR GPU initialized successfully")
|
|
except Exception as e:
|
|
logger.error(f"❌ PaddleOCR GPU initialization failed: {e}")
|
|
self.ocr_engine = None
|
|
self.available = False
|
|
raise RuntimeError(f"PaddleOCR GPU initialization failed: {e}")
|
|
|
|
def extract_text_from_image(self, image_path):
|
|
"""Extract text from image using OCR"""
|
|
if not self.available or not self.ocr_engine:
|
|
return {"text": "", "confidence": 0.0, "bboxes": [], "line_count": 0}
|
|
|
|
try:
|
|
# Check if image exists
|
|
if not os.path.exists(image_path):
|
|
logger.warning(f"Image not found: {image_path}")
|
|
return {"text": "", "confidence": 0.0, "bboxes": [], "line_count": 0}
|
|
|
|
# Perform OCR
|
|
result = self.ocr_engine.ocr(image_path)
|
|
|
|
if not result or not result[0]:
|
|
return {"text": "", "confidence": 0.0, "bboxes": [], "line_count": 0}
|
|
|
|
extracted_text = []
|
|
bboxes = []
|
|
total_confidence = 0.0
|
|
line_count = 0
|
|
|
|
for line in result[0]:
|
|
try:
|
|
# Handle different PaddleOCR result structures
|
|
if len(line) == 2:
|
|
# Standard structure: [[bbox], (text, confidence)]
|
|
bbox, (text, confidence) = line
|
|
elif len(line) >= 1:
|
|
# Handle alternative structures
|
|
bbox = line[0] if len(line) > 0 else []
|
|
if len(line) > 1:
|
|
if isinstance(line[1], (list, tuple)) and len(line[1]) >= 2:
|
|
text, confidence = line[1][0], line[1][1]
|
|
else:
|
|
text, confidence = str(line[1]) if len(line) > 1 else "", 0.0
|
|
else:
|
|
text, confidence = "", 0.0
|
|
else:
|
|
continue
|
|
|
|
# Ensure text is string and confidence is float
|
|
text_str = str(text) if text is not None else ""
|
|
confidence_float = 0.0
|
|
if confidence is not None:
|
|
if isinstance(confidence, (int, float)):
|
|
confidence_float = float(confidence)
|
|
elif isinstance(confidence, str):
|
|
try:
|
|
confidence_float = float(confidence)
|
|
except ValueError:
|
|
logger.warning(f"Could not convert confidence string to float: {confidence}")
|
|
confidence_float = 0.0
|
|
else:
|
|
logger.warning(f"Unexpected confidence type: {type(confidence)}, value: {confidence}")
|
|
confidence_float = 0.0
|
|
else:
|
|
confidence_float = 0.0
|
|
|
|
extracted_text.append(text_str)
|
|
bboxes.append(bbox)
|
|
total_confidence += confidence_float
|
|
line_count += 1
|
|
|
|
except (TypeError, ValueError, IndexError) as e:
|
|
logger.warning(f"Type conversion error in OCR line processing: {e}")
|
|
# Add empty text and continue
|
|
extracted_text.append("")
|
|
bboxes.append([])
|
|
total_confidence += 0.0
|
|
line_count += 1
|
|
|
|
try:
|
|
avg_confidence = total_confidence / line_count if line_count > 0 else 0.0
|
|
except (TypeError, ZeroDivisionError):
|
|
avg_confidence = 0.0
|
|
|
|
full_text = "\n".join(extracted_text)
|
|
|
|
logger.info(f"OCR extracted {len(full_text)} characters with confidence {avg_confidence:.3f}")
|
|
|
|
return {
|
|
"text": full_text,
|
|
"confidence": avg_confidence,
|
|
"bboxes": bboxes,
|
|
"line_count": line_count
|
|
}
|
|
except Exception as e:
|
|
logger.error(f"OCR processing failed: {e}")
|
|
return {"text": "", "confidence": 0.0, "bboxes": [], "line_count": 0}
|
|
|
|
# Singleton
|
|
_ocr_instance = None
|
|
|
|
def get_simple_ocr_processor():
|
|
global _ocr_instance
|
|
if _ocr_instance is None:
|
|
_ocr_instance = SimpleOCRProcessor()
|
|
return _ocr_instance
|
|
|
|
if __name__ == "__main__":
|
|
processor = get_simple_ocr_processor()
|
|
if processor.available:
|
|
print("✅ Simple OCR processor is working")
|
|
# Test with an image
|
|
test_image = "extracted_images/image1.png"
|
|
if os.path.exists(test_image):
|
|
result = processor.extract_text_from_image(test_image)
|
|
print(f"OCR Result: {len(result['text'])} chars, confidence: {result['confidence']:.3f}")
|
|
if result['text']:
|
|
print(f"Text: {result['text'][:100]}...")
|
|
else:
|
|
print("❌ Simple OCR processor failed")
|