246 lines
7.9 KiB
Python
246 lines
7.9 KiB
Python
"""
|
|
Fix dependency isolation between PaddleOCR and OpenCLIP
|
|
"""
|
|
|
|
import subprocess
|
|
import sys
|
|
import os
|
|
from pathlib import Path
|
|
|
|
def check_current_dependencies():
|
|
"""Check current PyTorch and CUDA versions"""
|
|
print("🔍 Checking current dependencies...")
|
|
|
|
try:
|
|
import torch
|
|
print(f"✅ PyTorch: {torch.__version__}")
|
|
print(f"✅ CUDA: {torch.version.cuda}")
|
|
print(f"✅ CUDA available: {torch.cuda.is_available()}")
|
|
except ImportError:
|
|
print("❌ PyTorch not installed")
|
|
|
|
try:
|
|
import paddle
|
|
print(f"✅ PaddlePaddle: {paddle.__version__}")
|
|
except ImportError:
|
|
print("❌ PaddlePaddle not installed")
|
|
|
|
def create_isolated_openclip_environment():
|
|
"""Create a separate virtual environment for OpenCLIP with PyTorch 2.1+"""
|
|
print("\n🔧 Creating isolated OpenCLIP environment...")
|
|
|
|
venv_path = "openclip_env"
|
|
|
|
# Create virtual environment
|
|
try:
|
|
subprocess.run([sys.executable, "-m", "venv", venv_path], check=True)
|
|
print(f"✅ Created virtual environment: {venv_path}")
|
|
|
|
# Get the Python executable in the virtual environment
|
|
if sys.platform == "win32":
|
|
python_exe = os.path.join(venv_path, "Scripts", "python.exe")
|
|
else:
|
|
python_exe = os.path.join(venv_path, "bin", "python")
|
|
|
|
# Install OpenCLIP with PyTorch 2.1+
|
|
packages = [
|
|
"torch>=2.1.0",
|
|
"torchvision>=0.16.0",
|
|
"open-clip-torch>=2.20.0",
|
|
"Pillow>=10.0.0"
|
|
]
|
|
|
|
for package in packages:
|
|
print(f"📦 Installing {package}...")
|
|
subprocess.run([python_exe, "-m", "pip", "install", package], check=True)
|
|
|
|
print("✅ OpenCLIP environment setup complete")
|
|
return python_exe
|
|
|
|
except subprocess.CalledProcessError as e:
|
|
print(f"❌ Failed to setup OpenCLIP environment: {e}")
|
|
return None
|
|
|
|
def create_isolated_image_classifier():
|
|
"""Create a truly isolated image classifier that uses the separate environment"""
|
|
classifier_code = '''
|
|
import sys
|
|
import os
|
|
import json
|
|
import tempfile
|
|
from pathlib import Path
|
|
|
|
def classify_image(image_path):
|
|
"""
|
|
Classify image using OpenCLIP in isolated environment
|
|
"""
|
|
try:
|
|
# Import OpenCLIP (this runs in the isolated environment)
|
|
import open_clip
|
|
import torch
|
|
from PIL import Image
|
|
|
|
# Check CUDA
|
|
device = "cuda" if torch.cuda.is_available() else "cpu"
|
|
print(f"🔍 Using device: {device}")
|
|
|
|
# Load model and processor
|
|
model, _, preprocess = open_clip.create_model_and_transforms('ViT-B-32', pretrained='laion2b_s34b_b79k')
|
|
model = model.to(device)
|
|
|
|
# Load and preprocess image
|
|
image = Image.open(image_path).convert('RGB')
|
|
image = preprocess(image).unsqueeze(0).to(device)
|
|
|
|
# Define candidate labels (including bee)
|
|
candidate_labels = [
|
|
"a bee", "an insect", "an animal", "a flower", "a plant",
|
|
"a bird", "a butterfly", "a dragonfly", "a bug", "a honeybee",
|
|
"clipart", "cartoon", "illustration", "drawing", "logo"
|
|
]
|
|
|
|
# Get text features
|
|
text = open_clip.tokenize(candidate_labels).to(device)
|
|
|
|
with torch.no_grad():
|
|
# Get image and text features
|
|
image_features = model.encode_image(image)
|
|
text_features = model.encode_text(text)
|
|
|
|
# Calculate similarity
|
|
image_features /= image_features.norm(dim=-1, keepdim=True)
|
|
text_features /= text_features.norm(dim=-1, keepdim=True)
|
|
similarity = (100.0 * image_features @ text_features.T).softmax(dim=-1)
|
|
|
|
# Get top predictions
|
|
values, indices = similarity[0].topk(3)
|
|
|
|
results = []
|
|
for value, idx in zip(values, indices):
|
|
results.append({
|
|
"label": candidate_labels[idx],
|
|
"score": round(value.item(), 3)
|
|
})
|
|
|
|
return {
|
|
"success": True,
|
|
"predictions": results,
|
|
"device": device
|
|
}
|
|
|
|
except Exception as e:
|
|
return {
|
|
"success": False,
|
|
"error": str(e),
|
|
"predictions": []
|
|
}
|
|
|
|
if __name__ == "__main__":
|
|
# Read image path from command line
|
|
if len(sys.argv) > 1:
|
|
image_path = sys.argv[1]
|
|
result = classify_image(image_path)
|
|
print(json.dumps(result))
|
|
else:
|
|
print(json.dumps({
|
|
"success": False,
|
|
"error": "No image path provided",
|
|
"predictions": []
|
|
}))
|
|
'''
|
|
|
|
# Write the classifier script
|
|
script_path = "openclip_classifier.py"
|
|
with open(script_path, "w", encoding="utf-8") as f:
|
|
f.write(classifier_code)
|
|
|
|
print(f"✅ Created isolated classifier script: {script_path}")
|
|
return script_path
|
|
|
|
def fix_word_image_extraction():
|
|
"""Fix Word document image extraction to properly detect images"""
|
|
fix_code = '''
|
|
import zipfile
|
|
import os
|
|
from pathlib import Path
|
|
|
|
def extract_images_from_docx(docx_path, output_dir):
|
|
"""
|
|
Extract all images from a Word document using zipfile method
|
|
"""
|
|
images = []
|
|
|
|
try:
|
|
# Create output directory
|
|
os.makedirs(output_dir, exist_ok=True)
|
|
|
|
# Open the docx as a zip file
|
|
with zipfile.ZipFile(docx_path, 'r') as zip_ref:
|
|
# List all files in the zip
|
|
for file_info in zip_ref.filelist:
|
|
# Check if file is in media directory (where images are stored)
|
|
if file_info.filename.startswith('word/media/'):
|
|
# Extract the image
|
|
image_filename = Path(file_info.filename).name
|
|
image_path = os.path.join(output_dir, image_filename)
|
|
|
|
# Extract and save
|
|
with zip_ref.open(file_info.filename) as source, open(image_path, 'wb') as target:
|
|
target.write(source.read())
|
|
|
|
images.append(image_path)
|
|
print(f"📸 Extracted image: {image_path}")
|
|
|
|
return images
|
|
|
|
except Exception as e:
|
|
print(f"❌ Error extracting images: {e}")
|
|
return []
|
|
|
|
if __name__ == "__main__":
|
|
# Test extraction
|
|
test_doc = "test.docx"
|
|
if os.path.exists(test_doc):
|
|
images = extract_images_from_docx(test_doc, "extracted_images")
|
|
print(f"📊 Found {len(images)} images")
|
|
for img in images:
|
|
print(f" - {img}")
|
|
else:
|
|
print(f"❌ Test document not found: {test_doc}")
|
|
'''
|
|
|
|
script_path = "word_image_extractor.py"
|
|
with open(script_path, "w", encoding="utf-8") as f:
|
|
f.write(fix_code)
|
|
|
|
print(f"✅ Created Word image extractor: {script_path}")
|
|
return script_path
|
|
|
|
def main():
|
|
print("🛠️ Fixing Dependency Isolation Issues")
|
|
print("=" * 50)
|
|
|
|
# Check current state
|
|
check_current_dependencies()
|
|
|
|
# Create isolated OpenCLIP environment
|
|
python_exe = create_isolated_openclip_environment()
|
|
|
|
# Create isolated classifier
|
|
classifier_script = create_isolated_image_classifier()
|
|
|
|
# Fix Word image extraction
|
|
extractor_script = fix_word_image_extraction()
|
|
|
|
print(f"\n🎯 Summary of fixes:")
|
|
print(f"✅ Isolated OpenCLIP environment: {python_exe}")
|
|
print(f"✅ Isolated classifier script: {classifier_script}")
|
|
print(f"✅ Word image extractor: {extractor_script}")
|
|
|
|
print(f"\n🚀 Next steps:")
|
|
print(f"1. Test Word image extraction: python {extractor_script}")
|
|
print(f"2. Update document_processor.py to use new extraction method")
|
|
print(f"3. Update isolated_image_classifier.py to use the new isolated environment")
|
|
|
|
if __name__ == "__main__":
|
|
main() |