205 lines
6.8 KiB
Python
205 lines
6.8 KiB
Python
"""
|
||
Diagnose actual GPU usage during OpenCLIP classification
|
||
"""
|
||
import subprocess
|
||
import os
|
||
import time
|
||
|
||
def diagnose_gpu_usage():
|
||
"""Check if OpenCLIP is actually using GPU during classification"""
|
||
print("🔍 DIAGNOSING ACTUAL GPU USAGE DURING OPENCLIP CLASSIFICATION")
|
||
print("=" * 60)
|
||
|
||
venv_python = "openclip_gpu_env\\Scripts\\python.exe"
|
||
|
||
if not os.path.exists(venv_python):
|
||
print("❌ Virtual environment not found")
|
||
return False
|
||
|
||
# Create a test script that monitors GPU usage during classification
|
||
test_script = """
|
||
import torch
|
||
import open_clip
|
||
from PIL import Image
|
||
import time
|
||
import tempfile
|
||
import os
|
||
|
||
print("=== GPU USAGE DIAGNOSTIC ===")
|
||
print(f"PyTorch version: {torch.__version__}")
|
||
print(f"CUDA available: {torch.cuda.is_available()}")
|
||
print(f"GPU device count: {torch.cuda.device_count()}")
|
||
if torch.cuda.is_available():
|
||
print(f"GPU 0: {torch.cuda.get_device_name(0)}")
|
||
print(f"Current device: {torch.cuda.current_device()}")
|
||
else:
|
||
print("❌ CUDA NOT AVAILABLE - USING CPU")
|
||
|
||
# Check if tensors are actually on GPU
|
||
print("\\n=== TENSOR DEVICE CHECK ===")
|
||
test_tensor = torch.randn(3, 224, 224)
|
||
print(f"Test tensor device (before): {test_tensor.device}")
|
||
|
||
if torch.cuda.is_available():
|
||
test_tensor = test_tensor.cuda()
|
||
print(f"Test tensor device (after .cuda()): {test_tensor.device}")
|
||
else:
|
||
print("❌ Cannot move tensor to GPU")
|
||
|
||
# Load model and check device
|
||
print("\\n=== MODEL LOADING ===")
|
||
start_time = time.time()
|
||
model, _, processor = open_clip.create_model_and_transforms(
|
||
model_name="ViT-B-32",
|
||
pretrained="laion2b_s34b_b79k"
|
||
)
|
||
load_time = time.time() - start_time
|
||
print(f"Model load time: {load_time:.2f}s")
|
||
|
||
# Check model device
|
||
print(f"Model device (before move): {next(model.parameters()).device}")
|
||
|
||
if torch.cuda.is_available():
|
||
model = model.cuda()
|
||
print(f"Model device (after .cuda()): {next(model.parameters()).device}")
|
||
else:
|
||
print("❌ Model remains on CPU")
|
||
|
||
# Create test image
|
||
with tempfile.NamedTemporaryFile(suffix='.png', delete=False) as f:
|
||
img_path = f.name
|
||
|
||
img = Image.new('RGB', (224, 224), color='red')
|
||
img.save(img_path)
|
||
|
||
# Process image and check device
|
||
print("\\n=== IMAGE PROCESSING ===")
|
||
image = processor(img).unsqueeze(0)
|
||
print(f"Image tensor device (before): {image.device}")
|
||
|
||
if torch.cuda.is_available():
|
||
image = image.cuda()
|
||
print(f"Image tensor device (after .cuda()): {image.device}")
|
||
else:
|
||
print("❌ Image tensor remains on CPU")
|
||
|
||
# Test classification with timing
|
||
print("\\n=== CLASSIFICATION PERFORMANCE ===")
|
||
text_labels = ["a photo of a bee", "a photo of a document"]
|
||
|
||
# Encode text
|
||
text_tokens = open_clip.tokenize(text_labels)
|
||
print(f"Text tokens device (before): {text_tokens.device}")
|
||
|
||
if torch.cuda.is_available():
|
||
text_tokens = text_tokens.cuda()
|
||
print(f"Text tokens device (after .cuda()): {text_tokens.device}")
|
||
|
||
# Time the actual classification
|
||
print("\\n=== CLASSIFICATION TIMING ===")
|
||
classification_start = time.time()
|
||
|
||
with torch.no_grad():
|
||
# Encode image
|
||
image_encode_start = time.time()
|
||
image_features = model.encode_image(image)
|
||
image_encode_time = time.time() - image_encode_start
|
||
print(f"Image encoding time: {image_encode_time:.2f}s")
|
||
|
||
# Encode text
|
||
text_encode_start = time.time()
|
||
text_features = model.encode_text(text_tokens)
|
||
text_encode_time = time.time() - text_encode_start
|
||
print(f"Text encoding time: {text_encode_time:.2f}s")
|
||
|
||
# Calculate similarity
|
||
similarity_start = time.time()
|
||
image_features /= image_features.norm(dim=-1, keepdim=True)
|
||
text_features /= text_features.norm(dim=-1, keepdim=True)
|
||
similarity = (100.0 * image_features @ text_features.T).softmax(dim=-1)
|
||
similarity_time = time.time() - similarity_start
|
||
print(f"Similarity calculation time: {similarity_time:.2f}s")
|
||
|
||
total_classification_time = time.time() - classification_start
|
||
print(f"Total classification time: {total_classification_time:.2f}s")
|
||
|
||
# Check if any operations fell back to CPU
|
||
print("\\n=== DEVICE CONSISTENCY CHECK ===")
|
||
print(f"Image features device: {image_features.device}")
|
||
print(f"Text features device: {text_features.device}")
|
||
print(f"Similarity tensor device: {similarity.device}")
|
||
|
||
# Clean up
|
||
os.unlink(img_path)
|
||
|
||
print("\\n=== DIAGNOSTIC SUMMARY ===")
|
||
if image_features.device.type == 'cuda' and text_features.device.type == 'cuda':
|
||
print("✅ SUCCESS: All operations on GPU")
|
||
else:
|
||
print("❌ WARNING: Some operations fell back to CPU")
|
||
print(f" Image features: {image_features.device}")
|
||
print(f" Text features: {text_features.device}")
|
||
"""
|
||
|
||
print("Running GPU usage diagnostic...")
|
||
result = subprocess.run([venv_python, "-c", test_script], capture_output=True, text=True, timeout=60)
|
||
|
||
print("STDOUT:")
|
||
print(result.stdout)
|
||
|
||
if result.stderr:
|
||
print("STDERR:")
|
||
print(result.stderr)
|
||
|
||
# Analyze results
|
||
if "All operations on GPU" in result.stdout:
|
||
print("\n🎉 GPU USAGE CONFIRMED: OpenCLIP is using GPU")
|
||
return True
|
||
elif "Some operations fell back to CPU" in result.stdout:
|
||
print("\n⚠️ GPU USAGE ISSUE: OpenCLIP is falling back to CPU")
|
||
return False
|
||
else:
|
||
print("\n❨ DIAGNOSTIC INCONCLUSIVE")
|
||
return False
|
||
|
||
def check_gpu_memory():
|
||
"""Check GPU memory usage and availability"""
|
||
print("\n🔍 CHECKING GPU MEMORY")
|
||
print("=" * 60)
|
||
|
||
venv_python = "openclip_gpu_env\\Scripts\\python.exe"
|
||
|
||
memory_script = """
|
||
import torch
|
||
if torch.cuda.is_available():
|
||
print(f"GPU Memory allocated: {torch.cuda.memory_allocated() / 1024**2:.1f} MB")
|
||
print(f"GPU Memory reserved: {torch.cuda.memory_reserved() / 1024**2:.1f} MB")
|
||
print(f"GPU Memory total: {torch.cuda.get_device_properties(0).total_memory / 1024**3:.1f} GB")
|
||
print(f"GPU Memory free: {(torch.cuda.get_device_properties(0).total_memory - torch.cuda.memory_reserved()) / 1024**3:.1f} GB")
|
||
else:
|
||
print("❌ CUDA not available")
|
||
"""
|
||
|
||
result = subprocess.run([venv_python, "-c", memory_script], capture_output=True, text=True, timeout=10)
|
||
print(result.stdout)
|
||
|
||
if __name__ == "__main__":
|
||
print("🚀 OPENCLIP GPU USAGE DIAGNOSTIC")
|
||
print("This will check if OpenCLIP is actually using GPU during classification")
|
||
print()
|
||
|
||
# Check GPU memory first
|
||
check_gpu_memory()
|
||
|
||
# Run detailed diagnostic
|
||
gpu_working = diagnose_gpu_usage()
|
||
|
||
if gpu_working:
|
||
print("\n✅ DIAGNOSTIC COMPLETE: OpenCLIP is properly using GPU")
|
||
else:
|
||
print("\n❌ DIAGNOSTIC COMPLETE: OpenCLIP has GPU usage issues")
|
||
print("Possible causes:")
|
||
print(" - CUDA drivers not properly installed")
|
||
print(" - GPU memory issues")
|
||
print(" - PyTorch CUDA version mismatch")
|
||
print(" - Model/tensor device placement issues") |