""" Fixed OCR and Image Classification with Complete Dependency Isolation Direct subprocess communication without file-based JSON parsing """ import os import sys import subprocess import tempfile import asyncio from pathlib import Path class FixedOCRProcessor: """Fixed OCR processor using direct subprocess communication""" def __init__(self): self.available = False self._initialize() def _initialize(self): """Initialize OCR processor""" try: # Test if PaddleOCR works test_script = """ import sys try: from paddleocr import PaddleOCR print("OCR_READY") except Exception as e: print(f"OCR_ERROR:{e}") """ result = subprocess.run([sys.executable, "-c", test_script], capture_output=True, text=True, timeout=10) if "OCR_READY" in result.stdout: self.available = True print("✅ OCR processor initialized successfully") else: print(f"❌ OCR initialization failed: {result.stderr}") except Exception as e: print(f"❌ OCR initialization failed: {e}") def extract_text_from_image(self, image_path): """Extract text from image using direct subprocess""" if not self.available or not os.path.exists(image_path): return {"text": "", "confidence": 0.0, "line_count": 0} try: ocr_script = f""" import sys from paddleocr import PaddleOCR try: ocr = PaddleOCR(use_gpu=True, cls=True) result = ocr.ocr(r"{image_path}") if not result or not result[0]: print("OCR_RESULT:EMPTY") sys.exit(0) extracted_text = [] total_confidence = 0.0 line_count = 0 for line in result[0]: try: if len(line) == 2: bbox, (text, confidence) = line elif len(line) >= 1: bbox = line[0] if len(line) > 0 else [] if len(line) > 1: if isinstance(line[1], (list, tuple)) and len(line[1]) >= 2: text, confidence = line[1][0], line[1][1] else: text, confidence = str(line[1]) if len(line) > 1 else "", 0.0 else: text, confidence = "", 0.0 else: continue text_str = str(text) if text is not None else "" confidence_float = float(confidence) if isinstance(confidence, (int, float)) else 0.0 extracted_text.append(text_str) total_confidence += confidence_float line_count += 1 except Exception: extracted_text.append("") total_confidence += 0.0 line_count += 1 avg_confidence = total_confidence / line_count if line_count > 0 else 0.0 full_text = " ".join(extracted_text) print(f"OCR_RESULT:TEXT={{full_text}}") print(f"OCR_RESULT:CONFIDENCE={{avg_confidence}}") print(f"OCR_RESULT:LINES={{line_count}}") except Exception as e: print(f"OCR_ERROR:{{e}}") """ result = subprocess.run([sys.executable, "-c", ocr_script], capture_output=True, text=True, timeout=60) # Parse results from stdout text = "" confidence = 0.0 line_count = 0 for line in result.stdout.split('\n'): if line.startswith("OCR_RESULT:TEXT="): text = line.replace("OCR_RESULT:TEXT=", "").strip() elif line.startswith("OCR_RESULT:CONFIDENCE="): try: confidence = float(line.replace("OCR_RESULT:CONFIDENCE=", "").strip()) except: confidence = 0.0 elif line.startswith("OCR_RESULT:LINES="): try: line_count = int(line.replace("OCR_RESULT:LINES=", "").strip()) except: line_count = 0 return { "text": text, "confidence": confidence, "line_count": line_count } except Exception as e: print(f"❌ OCR processing failed: {e}") return {"text": "", "confidence": 0.0, "line_count": 0} class FixedImageClassifier: """Fixed image classifier using direct subprocess communication""" def __init__(self): self.available = False self._initialize() def _initialize(self): """Initialize image classifier""" try: # Check if virtual environment exists and works venv_python = "openclip_env\\Scripts\\python.exe" if not os.path.exists(venv_python): print("❌ OpenCLIP virtual environment not found") return test_script = """ try: import open_clip print("CLASSIFIER_READY") except Exception as e: print(f"CLASSIFIER_ERROR:{e}") """ result = subprocess.run([venv_python, "-c", test_script], capture_output=True, text=True, timeout=30) if "CLASSIFIER_READY" in result.stdout: self.available = True print("✅ Image classifier initialized successfully") else: print(f"❌ Classifier initialization failed: {result.stderr}") except Exception as e: print(f"❌ Classifier initialization failed: {e}") def classify_image(self, image_path, top_k=3): """Classify image using direct subprocess communication""" if not self.available or not os.path.exists(image_path): return [{"label": "classification_unavailable", "confidence": 0.0}] try: venv_python = "openclip_env\\Scripts\\python.exe" classification_script = f""" import open_clip import torch from PIL import Image try: # Load model model, _, processor = open_clip.create_model_and_transforms( model_name="ViT-B-32", pretrained="laion2b_s34b_b79k" ) # Load and process image image = Image.open(r"{image_path}").convert("RGB") image_tensor = processor(image).unsqueeze(0) # Move to GPU if available if torch.cuda.is_available(): model = model.cuda() image_tensor = image_tensor.cuda() # Get predictions with torch.no_grad(): image_features = model.encode_image(image_tensor) image_features /= image_features.norm(dim=-1, keepdim=True) # Common labels text_labels = [ "a photo of a bee", "a photo of a flower", "a photo of a person", "a photo of a document", "a photo of a chart", "a photo of a diagram", "a photo of a table", "a photo of a graph", "a photo of a logo", "a photo of a signature", "a photo of a stamp", "a photo of a barcode" ] # Encode text labels text_tokens = open_clip.tokenize(text_labels) if torch.cuda.is_available(): text_tokens = text_tokens.cuda() text_features = model.encode_text(text_tokens) text_features /= text_features.norm(dim=-1, keepdim=True) # Calculate similarity similarity = (100.0 * image_features @ text_features.T).softmax(dim=-1) values, indices = similarity[0].topk({top_k}) for value, index in zip(values, indices): label = text_labels[index] confidence = float(value) print(f"CLASSIFICATION_RESULT:{{label}}|{{confidence}}") except Exception as e: print(f"CLASSIFICATION_ERROR:{{e}}") """ result = subprocess.run([venv_python, "-c", classification_script], capture_output=True, text=True, timeout=30) results = [] for line in result.stdout.split('\n'): if line.startswith("CLASSIFICATION_RESULT:"): parts = line.replace("CLASSIFICATION_RESULT:", "").split("|") if len(parts) == 2: try: label = parts[0] confidence = float(parts[1]) results.append({"label": label, "confidence": confidence}) except: continue if results: return results else: return [{"label": "classification_failed", "confidence": 0.0}] except Exception as e: print(f"❌ Classification failed: {e}") return [{"label": "classification_error", "confidence": 0.0}] class FixedDocumentProcessor: """Fixed document processor with complete dependency isolation""" def __init__(self): self.ocr_processor = FixedOCRProcessor() self.image_classifier = FixedImageClassifier() print("🎯 Fixed Document Processor Initialized") print(f" OCR: {'✅ Available' if self.ocr_processor.available else '❌ Not Available'}") print(f" Classifier: {'✅ Available' if self.image_classifier.available else '❌ Not Available'}") async def process_document(self, file_path): """Process document with fixed OCR and classification""" try: import zipfile import tempfile # Extract images from Word document images = [] content_parts = [] with tempfile.TemporaryDirectory() as temp_dir: # Extract images from docx with zipfile.ZipFile(file_path, 'r') as zip_ref: image_files = [] for file_info in zip_ref.filelist: if file_info.filename.startswith('word/media/'): image_filename = os.path.basename(file_info.filename) image_path = os.path.join(temp_dir, image_filename) with zip_ref.open(file_info.filename) as source, open(image_path, 'wb') as target: target.write(source.read()) image_files.append(image_path) print(f"📸 Extracted image: {image_path}") print(f"Found {len(image_files)} images in document") # Process each image for i, image_path in enumerate(image_files): image_metadata = {"index": i, "path": image_path} # OCR processing if self.ocr_processor.available: ocr_result = self.ocr_processor.extract_text_from_image(image_path) if ocr_result["text"].strip(): image_metadata["ocr_text"] = ocr_result["text"] image_metadata["ocr_confidence"] = ocr_result["confidence"] content_parts.append(f"[Image {i+1} OCR]: {ocr_result['text']}") print(f"✅ Image {i+1} OCR: {len(ocr_result['text'])} chars") # Image classification if self.image_classifier.available: classification_results = self.image_classifier.classify_image(image_path) image_metadata["classification"] = classification_results if classification_results and classification_results[0]["confidence"] > 0: top_label = classification_results[0]["label"] top_confidence = classification_results[0]["confidence"] content_parts.append(f"[Image {i+1} Classification]: {top_label} ({top_confidence:.3f})") print(f"✅ Image {i+1} Classification: {top_label} ({top_confidence:.3f})") # Check for bee if "bee" in top_label.lower(): print(f"🎯 BEE DETECTED in image {i+1}!") images.append(image_metadata) # Add some basic content content_parts.insert(0, f"Processed document: {os.path.basename(file_path)}") content_parts.insert(1, f"Total images: {len(images)}") full_content = "\n".join(content_parts) return { "success": True, "content": full_content, "metadata": { "file_type": "word", "images_count": len(images), "processed_with_ocr": self.ocr_processor.available, "processed_with_classification": self.image_classifier.available }, "images": images } except Exception as e: print(f"❌ Document processing failed: {e}") return { "success": False, "content": "", "metadata": {"error": str(e)}, "images": [] } async def test_fixed_solution(): """Test the fixed solution""" print("🧪 TESTING FIXED SOLUTION") print("=" * 50) processor = FixedDocumentProcessor() # Test with test.docx test_file = "test.docx" if not os.path.exists(test_file): print(f"❌ Test file not found: {test_file}") return print(f"\n📄 PROCESSING: {test_file}") result = await processor.process_document(test_file) if not result["success"]: print(f"❌ Processing failed: {result['metadata'].get('error', 'Unknown error')}") return print(f"✅ Processing successful") print(f"📊 Metadata: {result['metadata']}") # Analyze results ocr_working = False classification_working = False bee_found = False for img in result["images"]: if "ocr_text" in img and img["ocr_text"].strip(): ocr_working = True if "classification" in img and img["classification"] and img["classification"][0]["confidence"] > 0: classification_working = True if "bee" in img["classification"][0]["label"].lower(): bee_found = True print(f"\n🎯 FINAL RESULTS:") print(f" OCR: {'✅ WORKING' if ocr_working else '❌ FAILED'}") print(f" Classification: {'✅ WORKING' if classification_working else '❌ FAILED'}") print(f" Bee Detection: {'✅ SUCCESS' if bee_found else '❌ NOT FOUND'}") print(f" Dependency Isolation: {'✅ ACHIEVED' if ocr_working and classification_working else '❌ FAILED'}") return result if __name__ == "__main__": asyncio.run(test_fixed_solution())