Files
railseek6/debug_openclip_gpu.py

276 lines
9.0 KiB
Python

"""
Debug OpenCLIP GPU Issues with Complete Dependency Isolation
"""
import os
import sys
import subprocess
import tempfile
def debug_openclip_gpu():
"""Debug OpenCLIP GPU issues step by step"""
print("DEBUGGING OPENCLIP GPU ISSUES")
print("=" * 50)
# Step 1: Check if virtual environment exists
venv_python = "openclip_gpu_env\\Scripts\\python.exe"
if not os.path.exists(venv_python):
print("ERROR: Virtual environment not found: openclip_gpu_env")
return False
print("OK: Virtual environment found")
# Step 2: Test basic Python and imports
print("\nSTEP 1: BASIC PYTHON TEST")
test_script = """
import sys
print(f"Python version: {sys.version}")
print(f"Python executable: {sys.executable}")
"""
result = subprocess.run([venv_python, "-c", test_script], capture_output=True, text=True)
print(result.stdout)
if result.returncode != 0:
print(f"ERROR: Basic Python test failed: {result.stderr}")
return False
# Step 3: Test PyTorch and CUDA
print("\nSTEP 2: PYTORCH & CUDA TEST")
test_script = """
import torch
print(f"PyTorch version: {torch.__version__}")
print(f"CUDA available: {torch.cuda.is_available()}")
if torch.cuda.is_available():
print(f"CUDA version: {torch.version.cuda}")
print(f"GPU count: {torch.cuda.device_count()}")
for i in range(torch.cuda.device_count()):
print(f"GPU {i}: {torch.cuda.get_device_name(i)}")
# Test tensor operations on GPU
x = torch.randn(3, 3).cuda()
print(f"GPU tensor test: {x.shape} on {x.device}")
else:
print("ERROR: CUDA not available in PyTorch")
"""
result = subprocess.run([venv_python, "-c", test_script], capture_output=True, text=True)
print(result.stdout)
if result.returncode != 0:
print(f"ERROR: PyTorch test failed: {result.stderr}")
return False
# Step 4: Test OpenCLIP installation
print("\nSTEP 3: OPENCLIP INSTALLATION TEST")
test_script = """
try:
import open_clip
print("OK: OpenCLIP imported successfully")
print(f"OpenCLIP version: {open_clip.__version__}")
except Exception as e:
print(f"ERROR: OpenCLIP import failed: {e}")
import traceback
traceback.print_exc()
"""
result = subprocess.run([venv_python, "-c", test_script], capture_output=True, text=True)
print(result.stdout)
if "imported successfully" not in result.stdout:
print(f"ERROR: OpenCLIP installation test failed: {result.stderr}")
return False
# Step 5: Test OpenCLIP model loading with GPU
print("\nSTEP 4: OPENCLIP MODEL LOADING TEST")
test_script = """
import open_clip
import torch
try:
print("Loading OpenCLIP model...")
model, _, preprocess = open_clip.create_model_and_transforms(
model_name="ViT-B-32",
pretrained="laion2b_s34b_b79k"
)
print("OK: Model loaded successfully")
if torch.cuda.is_available():
model = model.cuda()
print("OK: Model moved to GPU")
else:
print("ERROR: Model running on CPU")
except Exception as e:
print(f"ERROR: Model loading failed: {e}")
import traceback
traceback.print_exc()
"""
result = subprocess.run([venv_python, "-c", test_script], capture_output=True, text=True)
print(result.stdout)
if "Model loaded successfully" not in result.stdout:
print(f"ERROR: OpenCLIP model loading test failed: {result.stderr}")
return False
# Step 6: Test full image classification pipeline
print("\nSTEP 5: FULL CLASSIFICATION PIPELINE TEST")
# Create a simple test image
with tempfile.NamedTemporaryFile(suffix='.png', delete=False) as f:
test_image_path = f.name
# Try to use an existing image from test.docx if available
test_images = []
try:
import zipfile
with zipfile.ZipFile("test.docx", 'r') as zip_ref:
for file_info in zip_ref.filelist:
if file_info.filename.startswith('word/media/'):
with tempfile.NamedTemporaryFile(suffix='.png', delete=False) as temp_file:
with zip_ref.open(file_info.filename) as source:
temp_file.write(source.read())
test_images.append(temp_file.name)
break
except:
print("WARNING: Could not extract test image from test.docx")
if not test_images:
# Create a minimal test image
from PIL import Image
img = Image.new('RGB', (100, 100), color='red')
img.save(test_image_path)
test_images = [test_image_path]
test_image = test_images[0]
print(f"Testing with image: {test_image}")
classification_script = f"""
import open_clip
import torch
from PIL import Image
import sys
try:
# Load model
model, _, processor = open_clip.create_model_and_transforms(
model_name="ViT-B-32",
pretrained="laion2b_s34b_b79k"
)
print("Model loaded, checking CUDA...")
if torch.cuda.is_available():
model = model.cuda()
print("Model on GPU")
else:
print("Model on CPU")
# Load and process image
print(f"Loading image: {test_image}")
image = Image.open(r"{test_image}").convert("RGB")
image_tensor = processor(image).unsqueeze(0)
if torch.cuda.is_available():
image_tensor = image_tensor.cuda()
print("Image tensor on GPU")
# Get predictions
with torch.no_grad():
print("Encoding image...")
image_features = model.encode_image(image_tensor)
image_features /= image_features.norm(dim=-1, keepdim=True)
# Test labels
text_labels = [
"a photo of a bee", "a photo of a flower", "a photo of a person",
"a photo of a document", "a photo of a chart"
]
print("Encoding text...")
text_tokens = open_clip.tokenize(text_labels)
if torch.cuda.is_available():
text_tokens = text_tokens.cuda()
text_features = model.encode_text(text_tokens)
text_features /= text_features.norm(dim=-1, keepdim=True)
print("Calculating similarity...")
similarity = (100.0 * image_features @ text_features.T).softmax(dim=-1)
values, indices = similarity[0].topk(3)
print("RESULTS:")
for val, idx in zip(values, indices):
lbl = text_labels[idx]
conf = float(val)
print(f" {lbl}: {conf:.4f}")
except Exception as e:
print(f"ERROR: {{e}}")
import traceback
traceback.print_exc()
sys.exit(1)
"""
result = subprocess.run([venv_python, "-c", classification_script], capture_output=True, text=True, timeout=60)
print("STDOUT:")
print(result.stdout)
if result.stderr:
print("STDERR:")
print(result.stderr)
# Clean up
for img_path in test_images:
if os.path.exists(img_path):
os.unlink(img_path)
if result.returncode == 0 and "RESULTS:" in result.stdout:
print("OK: OpenCLIP GPU classification working!")
return True
else:
print("ERROR: OpenCLIP GPU classification failed")
return False
def fix_openclip_issues():
"""Fix common OpenCLIP GPU issues"""
print("\nATTEMPTING TO FIX OPENCLIP GPU ISSUES")
print("=" * 50)
venv_python = "openclip_gpu_env\\Scripts\\python.exe"
# Downgrade NumPy to compatible version first
print("Downgrading NumPy to compatible version...")
cmd = [venv_python, "-m", "pip", "install", "numpy==1.24.3"]
result = subprocess.run(cmd, capture_output=True, text=True)
if result.returncode != 0:
print(f"ERROR: NumPy downgrade failed: {result.stderr}")
return False
print("OK: NumPy downgraded to 1.24.3")
# Reinstall OpenCLIP with specific version
print("Reinstalling OpenCLIP...")
cmd = [venv_python, "-m", "pip", "uninstall", "-y", "open-clip-torch"]
subprocess.run(cmd, capture_output=True)
cmd = [venv_python, "-m", "pip", "install", "open-clip-torch==2.20.0"]
result = subprocess.run(cmd, capture_output=True, text=True)
if result.returncode != 0:
print(f"ERROR: OpenCLIP reinstall failed: {result.stderr}")
return False
print("OK: OpenCLIP reinstalled")
return True
if __name__ == "__main__":
print("OPENCLIP GPU DEBUGGER")
print("This will identify and fix OpenCLIP GPU issues")
print()
# First try to debug
success = debug_openclip_gpu()
if not success:
print("\nAttempting to fix issues...")
if fix_openclip_issues():
print("\nRetesting after fix...")
success = debug_openclip_gpu()
if success:
print("\nOPENCLIP GPU DEBUGGING COMPLETE - SUCCESS!")
else:
print("\nOPENCLIP GPU DEBUGGING FAILED")
print("Please check:")
print(" - CUDA drivers are installed")
print(" - Virtual environment has GPU-enabled PyTorch")
print(" - OpenCLIP is compatible with PyTorch version")