234 lines
8.9 KiB
Python
234 lines
8.9 KiB
Python
"""
|
|
Isolated Image Classifier using subprocess to avoid dependency conflicts
|
|
This runs OpenCLIP in a separate process to avoid CUDA conflicts with PaddleOCR
|
|
"""
|
|
|
|
import os
|
|
import json
|
|
import tempfile
|
|
import subprocess
|
|
import logging
|
|
from pathlib import Path
|
|
from typing import List, Dict, Any
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
class IsolatedImageClassifier:
|
|
"""Image classifier that runs in separate process to avoid dependency conflicts"""
|
|
|
|
def __init__(self):
|
|
self.available = False
|
|
self._check_availability()
|
|
|
|
def _check_availability(self):
|
|
"""Check if OpenCLIP is available in the isolated virtual environment - no fallbacks"""
|
|
try:
|
|
# Use the virtual environment Python executable
|
|
venv_python = "openclip_gpu_env\\Scripts\\python.exe" if os.name == 'nt' else "openclip_gpu_env/bin/python"
|
|
|
|
if not os.path.exists(venv_python):
|
|
raise RuntimeError(f"Virtual environment not found: {venv_python}")
|
|
|
|
# Try to run a simple OpenCLIP check in subprocess
|
|
result = subprocess.run([
|
|
venv_python, '-c',
|
|
'try: import open_clip; print("SUCCESS"); exit(0)\nexcept Exception as e: print(f"FAILED: {e}"); exit(1)'
|
|
], capture_output=True, text=True, timeout=30, encoding='utf-8', errors='ignore')
|
|
|
|
if result.returncode == 0:
|
|
self.available = True
|
|
logger.info("OpenCLIP is available in isolated virtual environment")
|
|
else:
|
|
error_msg = result.stderr or result.stdout or "Unknown error"
|
|
raise RuntimeError(f"OpenCLIP check failed: {error_msg}")
|
|
|
|
except Exception as e:
|
|
logger.error(f"OpenCLIP availability check failed: {e}")
|
|
raise RuntimeError(f"OpenCLIP availability check failed: {e}")
|
|
|
|
def classify_image(self, image_path: str, top_k: int = 5) -> List[Dict[str, Any]]:
|
|
"""
|
|
Classify image using isolated subprocess to avoid dependency conflicts
|
|
|
|
Args:
|
|
image_path: Path to image file
|
|
top_k: Number of top predictions to return
|
|
|
|
Returns:
|
|
List of classification results with confidence scores
|
|
"""
|
|
if not self.available:
|
|
return [{"label": "classification_unavailable", "confidence": 0.0}]
|
|
|
|
try:
|
|
# Create temporary file for results
|
|
with tempfile.NamedTemporaryFile(mode='w', suffix='.json', delete=False) as result_file:
|
|
result_path = result_file.name
|
|
|
|
# Run classification in isolated process
|
|
# Properly escape Windows paths
|
|
image_path_escaped = image_path.replace('\\', '\\\\')
|
|
result_path_escaped = result_path.replace('\\', '\\\\')
|
|
|
|
script_content = f"""
|
|
import json
|
|
import os
|
|
import sys
|
|
|
|
try:
|
|
import open_clip
|
|
import torch
|
|
from PIL import Image
|
|
|
|
# Check if image exists
|
|
if not os.path.exists(r"{image_path_escaped}"):
|
|
raise FileNotFoundError(f"Image not found: {{r'{image_path_escaped}'}}")
|
|
|
|
# Load model
|
|
model, _, processor = open_clip.create_model_and_transforms(
|
|
model_name="ViT-B-32",
|
|
pretrained="laion2b_s34b_b79k"
|
|
)
|
|
|
|
# Load and process image
|
|
image = Image.open(r"{image_path_escaped}").convert("RGB")
|
|
image_tensor = processor(image).unsqueeze(0)
|
|
|
|
# Move to GPU if available
|
|
if torch.cuda.is_available():
|
|
model = model.cuda()
|
|
image_tensor = image_tensor.cuda()
|
|
|
|
# Get predictions
|
|
with torch.no_grad():
|
|
image_features = model.encode_image(image_tensor)
|
|
image_features /= image_features.norm(dim=-1, keepdim=True)
|
|
|
|
# Common labels for document images
|
|
text_labels = [
|
|
"a photo of a bee", "a photo of a flower", "a photo of a person",
|
|
"a photo of a document", "a photo of a chart", "a photo of a diagram",
|
|
"a photo of a table", "a photo of a graph", "a photo of a logo",
|
|
"a photo of a signature", "a photo of a stamp", "a photo of a barcode",
|
|
"a photo of a QR code", "a photo of a screenshot", "a photo of a landscape",
|
|
"a photo of an animal", "a photo of a building", "a photo of a vehicle",
|
|
"a photo of food", "a photo of clothing", "a photo of electronics",
|
|
"a photo of furniture", "a photo of nature", "a photo of art",
|
|
"a photo of text", "a photo of numbers", "a photo of symbols"
|
|
]
|
|
|
|
# Encode text labels
|
|
text_tokens = open_clip.tokenize(text_labels)
|
|
if torch.cuda.is_available():
|
|
text_tokens = text_tokens.cuda()
|
|
|
|
text_features = model.encode_text(text_tokens)
|
|
text_features /= text_features.norm(dim=-1, keepdim=True)
|
|
|
|
# Calculate similarity
|
|
similarity = (100.0 * image_features @ text_features.T).softmax(dim=-1)
|
|
values, indices = similarity[0].topk({top_k})
|
|
|
|
results = []
|
|
for value, index in zip(values, indices):
|
|
results.append({{
|
|
"label": text_labels[index],
|
|
"confidence": float(value)
|
|
}})
|
|
|
|
# Save results
|
|
with open(r"{result_path_escaped}", "w") as f:
|
|
json.dump(results, f)
|
|
|
|
except Exception as e:
|
|
# Save error
|
|
error_data = [{{"label": "error", "confidence": 0.0, "error": str(e)}}]
|
|
with open(r"{result_path_escaped}", "w") as f:
|
|
json.dump(error_data, f)
|
|
"""
|
|
|
|
# Save script to temporary file
|
|
with tempfile.NamedTemporaryFile(mode='w', suffix='.py', delete=False) as script_file:
|
|
script_file.write(script_content)
|
|
script_path = script_file.name
|
|
|
|
# Use the virtual environment Python executable
|
|
venv_python = "openclip_gpu_env\\Scripts\\python.exe" if os.name == 'nt' else "openclip_gpu_env/bin/python"
|
|
|
|
# Run the isolated classification with virtual environment
|
|
result = subprocess.run(
|
|
[venv_python, script_path],
|
|
capture_output=True,
|
|
text=True,
|
|
timeout=30 # 30 second timeout
|
|
)
|
|
|
|
# Clean up script file
|
|
os.unlink(script_path)
|
|
|
|
# Load results
|
|
if os.path.exists(result_path):
|
|
with open(result_path, 'r') as f:
|
|
results = json.load(f)
|
|
os.unlink(result_path)
|
|
|
|
# Check for errors
|
|
if results and 'error' in results[0]:
|
|
logger.error(f"Isolated classification failed: {results[0]['error']}")
|
|
return [{"label": "classification_error", "confidence": 0.0}]
|
|
|
|
return results
|
|
else:
|
|
logger.error("No results file created by isolated classification")
|
|
return [{"label": "classification_failed", "confidence": 0.0}]
|
|
|
|
except subprocess.TimeoutExpired:
|
|
logger.error("Isolated classification timed out")
|
|
return [{"label": "classification_timeout", "confidence": 0.0}]
|
|
except Exception as e:
|
|
logger.error(f"Isolated classification failed: {e}")
|
|
return [{"label": "classification_error", "confidence": 0.0}]
|
|
finally:
|
|
# Clean up any remaining files
|
|
for temp_file in [script_path, result_path]:
|
|
if temp_file and os.path.exists(temp_file):
|
|
try:
|
|
os.unlink(temp_file)
|
|
except:
|
|
pass
|
|
|
|
|
|
# Singleton instance
|
|
_classifier_instance = None
|
|
|
|
def get_isolated_classifier() -> IsolatedImageClassifier:
|
|
"""Get singleton isolated image classifier instance"""
|
|
global _classifier_instance
|
|
if _classifier_instance is None:
|
|
_classifier_instance = IsolatedImageClassifier()
|
|
return _classifier_instance
|
|
|
|
|
|
def test_isolated_classifier():
|
|
"""Test function for isolated image classifier"""
|
|
classifier = get_isolated_classifier()
|
|
|
|
if classifier.available:
|
|
print("✅ Isolated image classifier is available")
|
|
# Test with a sample image if available
|
|
test_images = ["test_bee_image.png", "sample_image.jpg"]
|
|
for test_image in test_images:
|
|
if os.path.exists(test_image):
|
|
results = classifier.classify_image(test_image)
|
|
print(f"Classification results for {test_image}:")
|
|
for result in results:
|
|
print(f" {result['label']}: {result['confidence']:.4f}")
|
|
break
|
|
else:
|
|
print("⚠️ No test images found for classification")
|
|
else:
|
|
print("❌ Isolated image classifier is not available")
|
|
|
|
|
|
if __name__ == "__main__":
|
|
test_isolated_classifier() |