railseek6/debug_openclip_gpu.py

"""
Debug OpenCLIP GPU Issues with Complete Dependency Isolation
"""

import os
import sys
import subprocess
import tempfile

def debug_openclip_gpu():
    """Debug OpenCLIP GPU issues step by step"""
    print("DEBUGGING OPENCLIP GPU ISSUES")
    print("=" * 50)

    # Step 1: Check if virtual environment exists
    venv_python = "openclip_gpu_env\\Scripts\\python.exe"
    if not os.path.exists(venv_python):
        print("ERROR: Virtual environment not found: openclip_gpu_env")
        return False

    print("OK: Virtual environment found")

    # Step 2: Test basic Python and imports
    print("\nSTEP 1: BASIC PYTHON TEST")
    test_script = """
import sys
print(f"Python version: {sys.version}")
print(f"Python executable: {sys.executable}")
"""
    result = subprocess.run([venv_python, "-c", test_script], capture_output=True, text=True)
    print(result.stdout)
    if result.returncode != 0:
        print(f"ERROR: Basic Python test failed: {result.stderr}")
        return False

    # Step 3: Test PyTorch and CUDA
    print("\nSTEP 2: PYTORCH & CUDA TEST")
    test_script = """
import torch
print(f"PyTorch version: {torch.__version__}")
print(f"CUDA available: {torch.cuda.is_available()}")
if torch.cuda.is_available():
    print(f"CUDA version: {torch.version.cuda}")
    print(f"GPU count: {torch.cuda.device_count()}")
    for i in range(torch.cuda.device_count()):
        print(f"GPU {i}: {torch.cuda.get_device_name(i)}")
    # Test tensor operations on GPU
    x = torch.randn(3, 3).cuda()
    print(f"GPU tensor test: {x.shape} on {x.device}")
else:
    print("ERROR: CUDA not available in PyTorch")
"""
    result = subprocess.run([venv_python, "-c", test_script], capture_output=True, text=True)
    print(result.stdout)
    if result.returncode != 0:
        print(f"ERROR: PyTorch test failed: {result.stderr}")
        return False

    # Step 4: Test OpenCLIP installation
    print("\nSTEP 3: OPENCLIP INSTALLATION TEST")
    test_script = """
try:
    import open_clip
    print("OK: OpenCLIP imported successfully")
    print(f"OpenCLIP version: {open_clip.__version__}")
except Exception as e:
    print(f"ERROR: OpenCLIP import failed: {e}")
    import traceback
    traceback.print_exc()
"""
    result = subprocess.run([venv_python, "-c", test_script], capture_output=True, text=True)
    print(result.stdout)
    if "imported successfully" not in result.stdout:
        print(f"ERROR: OpenCLIP installation test failed: {result.stderr}")
        return False

    # Step 5: Test OpenCLIP model loading with GPU
    print("\nSTEP 4: OPENCLIP MODEL LOADING TEST")
    test_script = """
import open_clip
import torch

try:
    print("Loading OpenCLIP model...")
    model, _, preprocess = open_clip.create_model_and_transforms(
        model_name="ViT-B-32",
        pretrained="laion2b_s34b_b79k"
    )
    print("OK: Model loaded successfully")

    if torch.cuda.is_available():
        model = model.cuda()
        print("OK: Model moved to GPU")
    else:
        print("ERROR: Model running on CPU")

except Exception as e:
    print(f"ERROR: Model loading failed: {e}")
    import traceback
    traceback.print_exc()
"""
    result = subprocess.run([venv_python, "-c", test_script], capture_output=True, text=True)
    print(result.stdout)
    if "Model loaded successfully" not in result.stdout:
        print(f"ERROR: OpenCLIP model loading test failed: {result.stderr}")
        return False

    # Step 6: Test full image classification pipeline
    print("\nSTEP 5: FULL CLASSIFICATION PIPELINE TEST")

    # Create a simple test image
    with tempfile.NamedTemporaryFile(suffix='.png', delete=False) as f:
        test_image_path = f.name

    # Try to use an existing image from test.docx if available
    test_images = []
    try:
        import zipfile
        with zipfile.ZipFile("test.docx", 'r') as zip_ref:
            for file_info in zip_ref.filelist:
                if file_info.filename.startswith('word/media/'):
                    with tempfile.NamedTemporaryFile(suffix='.png', delete=False) as temp_file:
                        with zip_ref.open(file_info.filename) as source:
                            temp_file.write(source.read())
                        test_images.append(temp_file.name)
                        break
    except:
        print("WARNING: Could not extract test image from test.docx")

    if not test_images:
        # Create a minimal test image
        from PIL import Image
        img = Image.new('RGB', (100, 100), color='red')
        img.save(test_image_path)
        test_images = [test_image_path]

    test_image = test_images[0]
    print(f"Testing with image: {test_image}")

    classification_script = f"""
import open_clip
import torch
from PIL import Image
import sys

try:
    # Load model
    model, _, processor = open_clip.create_model_and_transforms(
        model_name="ViT-B-32",
        pretrained="laion2b_s34b_b79k"
    )

    print("Model loaded, checking CUDA...")
    if torch.cuda.is_available():
        model = model.cuda()
        print("Model on GPU")
    else:
        print("Model on CPU")

    # Load and process image
    print(f"Loading image: {test_image}")
    image = Image.open(r"{test_image}").convert("RGB")
    image_tensor = processor(image).unsqueeze(0)

    if torch.cuda.is_available():
        image_tensor = image_tensor.cuda()
        print("Image tensor on GPU")

    # Get predictions
    with torch.no_grad():
        print("Encoding image...")
        image_features = model.encode_image(image_tensor)
        image_features /= image_features.norm(dim=-1, keepdim=True)

        # Test labels
        text_labels = [
            "a photo of a bee", "a photo of a flower", "a photo of a person",
            "a photo of a document", "a photo of a chart"
        ]

        print("Encoding text...")
        text_tokens = open_clip.tokenize(text_labels)
        if torch.cuda.is_available():
            text_tokens = text_tokens.cuda()

        text_features = model.encode_text(text_tokens)
        text_features /= text_features.norm(dim=-1, keepdim=True)

        print("Calculating similarity...")
        similarity = (100.0 * image_features @ text_features.T).softmax(dim=-1)
        values, indices = similarity[0].topk(3)

        print("RESULTS:")
        for val, idx in zip(values, indices):
            lbl = text_labels[idx]
            conf = float(val)
            print(f"  {lbl}: {conf:.4f}")

except Exception as e:
    print(f"ERROR: {{e}}")
    import traceback
    traceback.print_exc()
    sys.exit(1)
"""

    result = subprocess.run([venv_python, "-c", classification_script], capture_output=True, text=True, timeout=60)
    print("STDOUT:")
    print(result.stdout)
    if result.stderr:
        print("STDERR:")
        print(result.stderr)

    # Clean up
    for img_path in test_images:
        if os.path.exists(img_path):
            os.unlink(img_path)

    if result.returncode == 0 and "RESULTS:" in result.stdout:
        print("OK: OpenCLIP GPU classification working!")
        return True
    else:
        print("ERROR: OpenCLIP GPU classification failed")
        return False

def fix_openclip_issues():
    """Fix common OpenCLIP GPU issues"""
    print("\nATTEMPTING TO FIX OPENCLIP GPU ISSUES")
    print("=" * 50)

    venv_python = "openclip_gpu_env\\Scripts\\python.exe"

    # Downgrade NumPy to compatible version first
    print("Downgrading NumPy to compatible version...")
    cmd = [venv_python, "-m", "pip", "install", "numpy==1.24.3"]
    result = subprocess.run(cmd, capture_output=True, text=True)
    if result.returncode != 0:
        print(f"ERROR: NumPy downgrade failed: {result.stderr}")
        return False
    print("OK: NumPy downgraded to 1.24.3")

    # Reinstall OpenCLIP with specific version
    print("Reinstalling OpenCLIP...")
    cmd = [venv_python, "-m", "pip", "uninstall", "-y", "open-clip-torch"]
    subprocess.run(cmd, capture_output=True)

    cmd = [venv_python, "-m", "pip", "install", "open-clip-torch==2.20.0"]
    result = subprocess.run(cmd, capture_output=True, text=True)
    if result.returncode != 0:
        print(f"ERROR: OpenCLIP reinstall failed: {result.stderr}")
        return False

    print("OK: OpenCLIP reinstalled")
    return True

if __name__ == "__main__":
    print("OPENCLIP GPU DEBUGGER")
    print("This will identify and fix OpenCLIP GPU issues")
    print()

    # First try to debug
    success = debug_openclip_gpu()

    if not success:
        print("\nAttempting to fix issues...")
        if fix_openclip_issues():
            print("\nRetesting after fix...")
            success = debug_openclip_gpu()

    if success:
        print("\nOPENCLIP GPU DEBUGGING COMPLETE - SUCCESS!")
    else:
        print("\nOPENCLIP GPU DEBUGGING FAILED")
        print("Please check:")
        print("  - CUDA drivers are installed")
        print("  - Virtual environment has GPU-enabled PyTorch")
        print("  - OpenCLIP is compatible with PyTorch version")