""" Diagnose actual GPU usage during OpenCLIP classification """ import subprocess import os import time def diagnose_gpu_usage(): """Check if OpenCLIP is actually using GPU during classification""" print("šŸ” DIAGNOSING ACTUAL GPU USAGE DURING OPENCLIP CLASSIFICATION") print("=" * 60) venv_python = "openclip_gpu_env\\Scripts\\python.exe" if not os.path.exists(venv_python): print("āŒ Virtual environment not found") return False # Create a test script that monitors GPU usage during classification test_script = """ import torch import open_clip from PIL import Image import time import tempfile import os print("=== GPU USAGE DIAGNOSTIC ===") print(f"PyTorch version: {torch.__version__}") print(f"CUDA available: {torch.cuda.is_available()}") print(f"GPU device count: {torch.cuda.device_count()}") if torch.cuda.is_available(): print(f"GPU 0: {torch.cuda.get_device_name(0)}") print(f"Current device: {torch.cuda.current_device()}") else: print("āŒ CUDA NOT AVAILABLE - USING CPU") # Check if tensors are actually on GPU print("\\n=== TENSOR DEVICE CHECK ===") test_tensor = torch.randn(3, 224, 224) print(f"Test tensor device (before): {test_tensor.device}") if torch.cuda.is_available(): test_tensor = test_tensor.cuda() print(f"Test tensor device (after .cuda()): {test_tensor.device}") else: print("āŒ Cannot move tensor to GPU") # Load model and check device print("\\n=== MODEL LOADING ===") start_time = time.time() model, _, processor = open_clip.create_model_and_transforms( model_name="ViT-B-32", pretrained="laion2b_s34b_b79k" ) load_time = time.time() - start_time print(f"Model load time: {load_time:.2f}s") # Check model device print(f"Model device (before move): {next(model.parameters()).device}") if torch.cuda.is_available(): model = model.cuda() print(f"Model device (after .cuda()): {next(model.parameters()).device}") else: print("āŒ Model remains on CPU") # Create test image with tempfile.NamedTemporaryFile(suffix='.png', delete=False) as f: img_path = f.name img = Image.new('RGB', (224, 224), color='red') img.save(img_path) # Process image and check device print("\\n=== IMAGE PROCESSING ===") image = processor(img).unsqueeze(0) print(f"Image tensor device (before): {image.device}") if torch.cuda.is_available(): image = image.cuda() print(f"Image tensor device (after .cuda()): {image.device}") else: print("āŒ Image tensor remains on CPU") # Test classification with timing print("\\n=== CLASSIFICATION PERFORMANCE ===") text_labels = ["a photo of a bee", "a photo of a document"] # Encode text text_tokens = open_clip.tokenize(text_labels) print(f"Text tokens device (before): {text_tokens.device}") if torch.cuda.is_available(): text_tokens = text_tokens.cuda() print(f"Text tokens device (after .cuda()): {text_tokens.device}") # Time the actual classification print("\\n=== CLASSIFICATION TIMING ===") classification_start = time.time() with torch.no_grad(): # Encode image image_encode_start = time.time() image_features = model.encode_image(image) image_encode_time = time.time() - image_encode_start print(f"Image encoding time: {image_encode_time:.2f}s") # Encode text text_encode_start = time.time() text_features = model.encode_text(text_tokens) text_encode_time = time.time() - text_encode_start print(f"Text encoding time: {text_encode_time:.2f}s") # Calculate similarity similarity_start = time.time() image_features /= image_features.norm(dim=-1, keepdim=True) text_features /= text_features.norm(dim=-1, keepdim=True) similarity = (100.0 * image_features @ text_features.T).softmax(dim=-1) similarity_time = time.time() - similarity_start print(f"Similarity calculation time: {similarity_time:.2f}s") total_classification_time = time.time() - classification_start print(f"Total classification time: {total_classification_time:.2f}s") # Check if any operations fell back to CPU print("\\n=== DEVICE CONSISTENCY CHECK ===") print(f"Image features device: {image_features.device}") print(f"Text features device: {text_features.device}") print(f"Similarity tensor device: {similarity.device}") # Clean up os.unlink(img_path) print("\\n=== DIAGNOSTIC SUMMARY ===") if image_features.device.type == 'cuda' and text_features.device.type == 'cuda': print("āœ… SUCCESS: All operations on GPU") else: print("āŒ WARNING: Some operations fell back to CPU") print(f" Image features: {image_features.device}") print(f" Text features: {text_features.device}") """ print("Running GPU usage diagnostic...") result = subprocess.run([venv_python, "-c", test_script], capture_output=True, text=True, timeout=60) print("STDOUT:") print(result.stdout) if result.stderr: print("STDERR:") print(result.stderr) # Analyze results if "All operations on GPU" in result.stdout: print("\nšŸŽ‰ GPU USAGE CONFIRMED: OpenCLIP is using GPU") return True elif "Some operations fell back to CPU" in result.stdout: print("\nāš ļø GPU USAGE ISSUE: OpenCLIP is falling back to CPU") return False else: print("\nāØ DIAGNOSTIC INCONCLUSIVE") return False def check_gpu_memory(): """Check GPU memory usage and availability""" print("\nšŸ” CHECKING GPU MEMORY") print("=" * 60) venv_python = "openclip_gpu_env\\Scripts\\python.exe" memory_script = """ import torch if torch.cuda.is_available(): print(f"GPU Memory allocated: {torch.cuda.memory_allocated() / 1024**2:.1f} MB") print(f"GPU Memory reserved: {torch.cuda.memory_reserved() / 1024**2:.1f} MB") print(f"GPU Memory total: {torch.cuda.get_device_properties(0).total_memory / 1024**3:.1f} GB") print(f"GPU Memory free: {(torch.cuda.get_device_properties(0).total_memory - torch.cuda.memory_reserved()) / 1024**3:.1f} GB") else: print("āŒ CUDA not available") """ result = subprocess.run([venv_python, "-c", memory_script], capture_output=True, text=True, timeout=10) print(result.stdout) if __name__ == "__main__": print("šŸš€ OPENCLIP GPU USAGE DIAGNOSTIC") print("This will check if OpenCLIP is actually using GPU during classification") print() # Check GPU memory first check_gpu_memory() # Run detailed diagnostic gpu_working = diagnose_gpu_usage() if gpu_working: print("\nāœ… DIAGNOSTIC COMPLETE: OpenCLIP is properly using GPU") else: print("\nāŒ DIAGNOSTIC COMPLETE: OpenCLIP has GPU usage issues") print("Possible causes:") print(" - CUDA drivers not properly installed") print(" - GPU memory issues") print(" - PyTorch CUDA version mismatch") print(" - Model/tensor device placement issues")