Files
railseek6/diagnose_gpu_usage.py

205 lines
6.8 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

"""
Diagnose actual GPU usage during OpenCLIP classification
"""
import subprocess
import os
import time
def diagnose_gpu_usage():
"""Check if OpenCLIP is actually using GPU during classification"""
print("🔍 DIAGNOSING ACTUAL GPU USAGE DURING OPENCLIP CLASSIFICATION")
print("=" * 60)
venv_python = "openclip_gpu_env\\Scripts\\python.exe"
if not os.path.exists(venv_python):
print("❌ Virtual environment not found")
return False
# Create a test script that monitors GPU usage during classification
test_script = """
import torch
import open_clip
from PIL import Image
import time
import tempfile
import os
print("=== GPU USAGE DIAGNOSTIC ===")
print(f"PyTorch version: {torch.__version__}")
print(f"CUDA available: {torch.cuda.is_available()}")
print(f"GPU device count: {torch.cuda.device_count()}")
if torch.cuda.is_available():
print(f"GPU 0: {torch.cuda.get_device_name(0)}")
print(f"Current device: {torch.cuda.current_device()}")
else:
print("❌ CUDA NOT AVAILABLE - USING CPU")
# Check if tensors are actually on GPU
print("\\n=== TENSOR DEVICE CHECK ===")
test_tensor = torch.randn(3, 224, 224)
print(f"Test tensor device (before): {test_tensor.device}")
if torch.cuda.is_available():
test_tensor = test_tensor.cuda()
print(f"Test tensor device (after .cuda()): {test_tensor.device}")
else:
print("❌ Cannot move tensor to GPU")
# Load model and check device
print("\\n=== MODEL LOADING ===")
start_time = time.time()
model, _, processor = open_clip.create_model_and_transforms(
model_name="ViT-B-32",
pretrained="laion2b_s34b_b79k"
)
load_time = time.time() - start_time
print(f"Model load time: {load_time:.2f}s")
# Check model device
print(f"Model device (before move): {next(model.parameters()).device}")
if torch.cuda.is_available():
model = model.cuda()
print(f"Model device (after .cuda()): {next(model.parameters()).device}")
else:
print("❌ Model remains on CPU")
# Create test image
with tempfile.NamedTemporaryFile(suffix='.png', delete=False) as f:
img_path = f.name
img = Image.new('RGB', (224, 224), color='red')
img.save(img_path)
# Process image and check device
print("\\n=== IMAGE PROCESSING ===")
image = processor(img).unsqueeze(0)
print(f"Image tensor device (before): {image.device}")
if torch.cuda.is_available():
image = image.cuda()
print(f"Image tensor device (after .cuda()): {image.device}")
else:
print("❌ Image tensor remains on CPU")
# Test classification with timing
print("\\n=== CLASSIFICATION PERFORMANCE ===")
text_labels = ["a photo of a bee", "a photo of a document"]
# Encode text
text_tokens = open_clip.tokenize(text_labels)
print(f"Text tokens device (before): {text_tokens.device}")
if torch.cuda.is_available():
text_tokens = text_tokens.cuda()
print(f"Text tokens device (after .cuda()): {text_tokens.device}")
# Time the actual classification
print("\\n=== CLASSIFICATION TIMING ===")
classification_start = time.time()
with torch.no_grad():
# Encode image
image_encode_start = time.time()
image_features = model.encode_image(image)
image_encode_time = time.time() - image_encode_start
print(f"Image encoding time: {image_encode_time:.2f}s")
# Encode text
text_encode_start = time.time()
text_features = model.encode_text(text_tokens)
text_encode_time = time.time() - text_encode_start
print(f"Text encoding time: {text_encode_time:.2f}s")
# Calculate similarity
similarity_start = time.time()
image_features /= image_features.norm(dim=-1, keepdim=True)
text_features /= text_features.norm(dim=-1, keepdim=True)
similarity = (100.0 * image_features @ text_features.T).softmax(dim=-1)
similarity_time = time.time() - similarity_start
print(f"Similarity calculation time: {similarity_time:.2f}s")
total_classification_time = time.time() - classification_start
print(f"Total classification time: {total_classification_time:.2f}s")
# Check if any operations fell back to CPU
print("\\n=== DEVICE CONSISTENCY CHECK ===")
print(f"Image features device: {image_features.device}")
print(f"Text features device: {text_features.device}")
print(f"Similarity tensor device: {similarity.device}")
# Clean up
os.unlink(img_path)
print("\\n=== DIAGNOSTIC SUMMARY ===")
if image_features.device.type == 'cuda' and text_features.device.type == 'cuda':
print("✅ SUCCESS: All operations on GPU")
else:
print("❌ WARNING: Some operations fell back to CPU")
print(f" Image features: {image_features.device}")
print(f" Text features: {text_features.device}")
"""
print("Running GPU usage diagnostic...")
result = subprocess.run([venv_python, "-c", test_script], capture_output=True, text=True, timeout=60)
print("STDOUT:")
print(result.stdout)
if result.stderr:
print("STDERR:")
print(result.stderr)
# Analyze results
if "All operations on GPU" in result.stdout:
print("\n🎉 GPU USAGE CONFIRMED: OpenCLIP is using GPU")
return True
elif "Some operations fell back to CPU" in result.stdout:
print("\n⚠️ GPU USAGE ISSUE: OpenCLIP is falling back to CPU")
return False
else:
print("\n DIAGNOSTIC INCONCLUSIVE")
return False
def check_gpu_memory():
"""Check GPU memory usage and availability"""
print("\n🔍 CHECKING GPU MEMORY")
print("=" * 60)
venv_python = "openclip_gpu_env\\Scripts\\python.exe"
memory_script = """
import torch
if torch.cuda.is_available():
print(f"GPU Memory allocated: {torch.cuda.memory_allocated() / 1024**2:.1f} MB")
print(f"GPU Memory reserved: {torch.cuda.memory_reserved() / 1024**2:.1f} MB")
print(f"GPU Memory total: {torch.cuda.get_device_properties(0).total_memory / 1024**3:.1f} GB")
print(f"GPU Memory free: {(torch.cuda.get_device_properties(0).total_memory - torch.cuda.memory_reserved()) / 1024**3:.1f} GB")
else:
print("❌ CUDA not available")
"""
result = subprocess.run([venv_python, "-c", memory_script], capture_output=True, text=True, timeout=10)
print(result.stdout)
if __name__ == "__main__":
print("🚀 OPENCLIP GPU USAGE DIAGNOSTIC")
print("This will check if OpenCLIP is actually using GPU during classification")
print()
# Check GPU memory first
check_gpu_memory()
# Run detailed diagnostic
gpu_working = diagnose_gpu_usage()
if gpu_working:
print("\n✅ DIAGNOSTIC COMPLETE: OpenCLIP is properly using GPU")
else:
print("\n❌ DIAGNOSTIC COMPLETE: OpenCLIP has GPU usage issues")
print("Possible causes:")
print(" - CUDA drivers not properly installed")
print(" - GPU memory issues")
print(" - PyTorch CUDA version mismatch")
print(" - Model/tensor device placement issues")