"""
Diagnose actual GPU usage during OpenCLIP classification
"""
import subprocess
import os
import time

def diagnose_gpu_usage():
    """Check if OpenCLIP is actually using GPU during classification"""
    print("🔍 DIAGNOSING ACTUAL GPU USAGE DURING OPENCLIP CLASSIFICATION")
    print("=" * 60)
    
    venv_python = "openclip_gpu_env\\Scripts\\python.exe"
    
    if not os.path.exists(venv_python):
        print("❌ Virtual environment not found")
        return False
    
    # Create a test script that monitors GPU usage during classification
    test_script = """
import torch
import open_clip
from PIL import Image
import time
import tempfile
import os

print("=== GPU USAGE DIAGNOSTIC ===")
print(f"PyTorch version: {torch.__version__}")
print(f"CUDA available: {torch.cuda.is_available()}")
print(f"GPU device count: {torch.cuda.device_count()}")
if torch.cuda.is_available():
    print(f"GPU 0: {torch.cuda.get_device_name(0)}")
    print(f"Current device: {torch.cuda.current_device()}")
else:
    print("❌ CUDA NOT AVAILABLE - USING CPU")

# Check if tensors are actually on GPU
print("\\n=== TENSOR DEVICE CHECK ===")
test_tensor = torch.randn(3, 224, 224)
print(f"Test tensor device (before): {test_tensor.device}")

if torch.cuda.is_available():
    test_tensor = test_tensor.cuda()
    print(f"Test tensor device (after .cuda()): {test_tensor.device}")
else:
    print("❌ Cannot move tensor to GPU")

# Load model and check device
print("\\n=== MODEL LOADING ===")
start_time = time.time()
model, _, processor = open_clip.create_model_and_transforms(
    model_name="ViT-B-32",
    pretrained="laion2b_s34b_b79k"
)
load_time = time.time() - start_time
print(f"Model load time: {load_time:.2f}s")

# Check model device
print(f"Model device (before move): {next(model.parameters()).device}")

if torch.cuda.is_available():
    model = model.cuda()
    print(f"Model device (after .cuda()): {next(model.parameters()).device}")
else:
    print("❌ Model remains on CPU")

# Create test image
with tempfile.NamedTemporaryFile(suffix='.png', delete=False) as f:
    img_path = f.name

img = Image.new('RGB', (224, 224), color='red')
img.save(img_path)

# Process image and check device
print("\\n=== IMAGE PROCESSING ===")
image = processor(img).unsqueeze(0)
print(f"Image tensor device (before): {image.device}")

if torch.cuda.is_available():
    image = image.cuda()
    print(f"Image tensor device (after .cuda()): {image.device}")
else:
    print("❌ Image tensor remains on CPU")

# Test classification with timing
print("\\n=== CLASSIFICATION PERFORMANCE ===")
text_labels = ["a photo of a bee", "a photo of a document"]

# Encode text
text_tokens = open_clip.tokenize(text_labels)
print(f"Text tokens device (before): {text_tokens.device}")

if torch.cuda.is_available():
    text_tokens = text_tokens.cuda()
    print(f"Text tokens device (after .cuda()): {text_tokens.device}")

# Time the actual classification
print("\\n=== CLASSIFICATION TIMING ===")
classification_start = time.time()

with torch.no_grad():
    # Encode image
    image_encode_start = time.time()
    image_features = model.encode_image(image)
    image_encode_time = time.time() - image_encode_start
    print(f"Image encoding time: {image_encode_time:.2f}s")
    
    # Encode text
    text_encode_start = time.time()
    text_features = model.encode_text(text_tokens)
    text_encode_time = time.time() - text_encode_start
    print(f"Text encoding time: {text_encode_time:.2f}s")
    
    # Calculate similarity
    similarity_start = time.time()
    image_features /= image_features.norm(dim=-1, keepdim=True)
    text_features /= text_features.norm(dim=-1, keepdim=True)
    similarity = (100.0 * image_features @ text_features.T).softmax(dim=-1)
    similarity_time = time.time() - similarity_start
    print(f"Similarity calculation time: {similarity_time:.2f}s")

total_classification_time = time.time() - classification_start
print(f"Total classification time: {total_classification_time:.2f}s")

# Check if any operations fell back to CPU
print("\\n=== DEVICE CONSISTENCY CHECK ===")
print(f"Image features device: {image_features.device}")
print(f"Text features device: {text_features.device}")
print(f"Similarity tensor device: {similarity.device}")

# Clean up
os.unlink(img_path)

print("\\n=== DIAGNOSTIC SUMMARY ===")
if image_features.device.type == 'cuda' and text_features.device.type == 'cuda':
    print("✅ SUCCESS: All operations on GPU")
else:
    print("❌ WARNING: Some operations fell back to CPU")
    print(f"   Image features: {image_features.device}")
    print(f"   Text features: {text_features.device}")
"""

    print("Running GPU usage diagnostic...")
    result = subprocess.run([venv_python, "-c", test_script], capture_output=True, text=True, timeout=60)
    
    print("STDOUT:")
    print(result.stdout)
    
    if result.stderr:
        print("STDERR:")
        print(result.stderr)
    
    # Analyze results
    if "All operations on GPU" in result.stdout:
        print("\n🎉 GPU USAGE CONFIRMED: OpenCLIP is using GPU")
        return True
    elif "Some operations fell back to CPU" in result.stdout:
        print("\n⚠️ GPU USAGE ISSUE: OpenCLIP is falling back to CPU")
        return False
    else:
        print("\n❨ DIAGNOSTIC INCONCLUSIVE")
        return False

def check_gpu_memory():
    """Check GPU memory usage and availability"""
    print("\n🔍 CHECKING GPU MEMORY")
    print("=" * 60)
    
    venv_python = "openclip_gpu_env\\Scripts\\python.exe"
    
    memory_script = """
import torch
if torch.cuda.is_available():
    print(f"GPU Memory allocated: {torch.cuda.memory_allocated() / 1024**2:.1f} MB")
    print(f"GPU Memory reserved: {torch.cuda.memory_reserved() / 1024**2:.1f} MB")
    print(f"GPU Memory total: {torch.cuda.get_device_properties(0).total_memory / 1024**3:.1f} GB")
    print(f"GPU Memory free: {(torch.cuda.get_device_properties(0).total_memory - torch.cuda.memory_reserved()) / 1024**3:.1f} GB")
else:
    print("❌ CUDA not available")
"""
    
    result = subprocess.run([venv_python, "-c", memory_script], capture_output=True, text=True, timeout=10)
    print(result.stdout)

if __name__ == "__main__":
    print("🚀 OPENCLIP GPU USAGE DIAGNOSTIC")
    print("This will check if OpenCLIP is actually using GPU during classification")
    print()
    
    # Check GPU memory first
    check_gpu_memory()
    
    # Run detailed diagnostic
    gpu_working = diagnose_gpu_usage()
    
    if gpu_working:
        print("\n✅ DIAGNOSTIC COMPLETE: OpenCLIP is properly using GPU")
    else:
        print("\n❌ DIAGNOSTIC COMPLETE: OpenCLIP has GPU usage issues")
        print("Possible causes:")
        print("  - CUDA drivers not properly installed")
        print("  - GPU memory issues")
        print("  - PyTorch CUDA version mismatch")
        print("  - Model/tensor device placement issues")