210 lines
7.6 KiB
Python
210 lines
7.6 KiB
Python
import os
|
|
import sys
|
|
import subprocess
|
|
import torch
|
|
import platform
|
|
|
|
def check_cuda_installation():
|
|
"""Check CUDA installation and version"""
|
|
print("=== CUDA INSTALLATION CHECK ===")
|
|
|
|
# Check CUDA_PATH environment variable
|
|
cuda_path = os.environ.get('CUDA_PATH')
|
|
print(f"CUDA_PATH: {cuda_path}")
|
|
|
|
if cuda_path and os.path.exists(cuda_path):
|
|
print("✓ CUDA_PATH exists")
|
|
|
|
# Check CUDA version
|
|
try:
|
|
nvcc_path = os.path.join(cuda_path, 'bin', 'nvcc.exe')
|
|
if os.path.exists(nvcc_path):
|
|
result = subprocess.run([nvcc_path, '--version'], capture_output=True, text=True)
|
|
if result.returncode == 0:
|
|
print(f"✓ NVIDIA CUDA Compiler found")
|
|
for line in result.stdout.split('\n'):
|
|
if 'release' in line.lower():
|
|
print(f" {line.strip()}")
|
|
else:
|
|
print("✗ nvcc.exe not found in CUDA_PATH/bin")
|
|
except Exception as e:
|
|
print(f"✗ Error checking nvcc: {e}")
|
|
else:
|
|
print("✗ CUDA_PATH not set or invalid")
|
|
|
|
def check_cudnn_installation():
|
|
"""Check cuDNN installation"""
|
|
print("\n=== cuDNN INSTALLATION CHECK ===")
|
|
|
|
cuda_path = os.environ.get('CUDA_PATH')
|
|
if not cuda_path:
|
|
print("✗ CUDA_PATH not set, cannot check cuDNN")
|
|
return
|
|
|
|
# Check common cuDNN locations
|
|
cudnn_locations = [
|
|
os.path.join(cuda_path, 'bin', 'cudnn64_8.dll'),
|
|
os.path.join(cuda_path, 'bin', 'cudnn64_7.dll'),
|
|
os.path.join(cuda_path, 'bin', 'cudnn64.dll'),
|
|
r'C:\Program Files\NVIDIA\cudnn-windows-x86_64-8.9.7.29_cuda12-archive\bin\cudnn64_8.dll'
|
|
]
|
|
|
|
found_cudnn = False
|
|
for location in cudnn_locations:
|
|
if os.path.exists(location):
|
|
print(f"✓ cuDNN found: {location}")
|
|
found_cudnn = True
|
|
# Check if it's in PATH
|
|
cudnn_dir = os.path.dirname(location)
|
|
if cudnn_dir in os.environ.get('PATH', ''):
|
|
print(f" ✓ cuDNN directory in PATH")
|
|
else:
|
|
print(f" ✗ cuDNN directory NOT in PATH")
|
|
break
|
|
|
|
if not found_cudnn:
|
|
print("✗ No cuDNN DLL found in common locations")
|
|
|
|
def check_pytorch_gpu():
|
|
"""Check PyTorch GPU support"""
|
|
print("\n=== PYTORCH GPU CHECK ===")
|
|
try:
|
|
print(f"PyTorch version: {torch.__version__}")
|
|
print(f"CUDA available: {torch.cuda.is_available()}")
|
|
|
|
if torch.cuda.is_available():
|
|
print(f"CUDA version: {torch.version.cuda}")
|
|
print(f"GPU device count: {torch.cuda.device_count()}")
|
|
|
|
for i in range(torch.cuda.device_count()):
|
|
print(f" GPU {i}: {torch.cuda.get_device_name(i)}")
|
|
print(f" Memory: {torch.cuda.get_device_properties(i).total_memory / 1024**3:.1f} GB")
|
|
else:
|
|
print("✗ CUDA not available in PyTorch")
|
|
|
|
except Exception as e:
|
|
print(f"✗ Error checking PyTorch: {e}")
|
|
|
|
def check_paddle_gpu():
|
|
"""Check PaddlePaddle GPU support"""
|
|
print("\n=== PADDLEPADDLE GPU CHECK ===")
|
|
try:
|
|
import paddle
|
|
print(f"PaddlePaddle version: {paddle.__version__}")
|
|
print(f"Paddle GPU available: {paddle.is_compiled_with_cuda()}")
|
|
|
|
if paddle.is_compiled_with_cuda():
|
|
print(f"Paddle CUDA version: {paddle.version.cuda()}")
|
|
try:
|
|
paddle.device.set_device('gpu')
|
|
print("✓ PaddlePaddle GPU device set successfully")
|
|
except Exception as e:
|
|
print(f"✗ Error setting PaddlePaddle GPU device: {e}")
|
|
else:
|
|
print("✗ PaddlePaddle not compiled with CUDA")
|
|
|
|
except ImportError:
|
|
print("✗ PaddlePaddle not installed")
|
|
except Exception as e:
|
|
print(f"✗ Error checking PaddlePaddle: {e}")
|
|
|
|
def check_environment_variables():
|
|
"""Check relevant environment variables"""
|
|
print("\n=== ENVIRONMENT VARIABLES ===")
|
|
env_vars = [
|
|
'CUDA_PATH', 'CUDA_PATH_V12_9', 'PATH',
|
|
'CUDA_VISIBLE_DEVICES', 'CUDA_CACHE_PATH'
|
|
]
|
|
|
|
for var in env_vars:
|
|
value = os.environ.get(var)
|
|
if value:
|
|
if var == 'PATH':
|
|
# Show only CUDA-related paths in PATH
|
|
cuda_paths = [p for p in value.split(';') if 'cuda' in p.lower() or 'nvidia' in p.lower()]
|
|
print(f"{var}:")
|
|
for path in cuda_paths:
|
|
print(f" {path}")
|
|
else:
|
|
print(f"{var}: {value}")
|
|
else:
|
|
print(f"{var}: Not set")
|
|
|
|
def test_cudnn_directly():
|
|
"""Test cuDNN directly to identify the specific issue"""
|
|
print("\n=== DIRECT cuDNN TEST ===")
|
|
try:
|
|
import ctypes
|
|
cuda_path = os.environ.get('CUDA_PATH')
|
|
|
|
if cuda_path:
|
|
cudnn_paths = [
|
|
os.path.join(cuda_path, 'bin', 'cudnn64_8.dll'),
|
|
os.path.join(cuda_path, 'bin', 'cudnn64.dll'),
|
|
r'C:\Program Files\NVIDIA\cudnn-windows-x86_64-8.9.7.29_cuda12-archive\bin\cudnn64_8.dll'
|
|
]
|
|
|
|
for cudnn_path in cudnn_paths:
|
|
if os.path.exists(cudnn_path):
|
|
print(f"Testing cuDNN: {cudnn_path}")
|
|
try:
|
|
cudnn = ctypes.WinDLL(cudnn_path)
|
|
print(f"✓ Successfully loaded {os.path.basename(cudnn_path)}")
|
|
|
|
# Try to get version
|
|
try:
|
|
cudnnGetVersion = cudnn.cudnnGetVersion
|
|
cudnnGetVersion.restype = ctypes.c_size_t
|
|
version = cudnnGetVersion()
|
|
print(f"✓ cuDNN version: {version}")
|
|
return True
|
|
except Exception as e:
|
|
print(f"✗ Cannot get cuDNN version: {e}")
|
|
return False
|
|
|
|
except Exception as e:
|
|
print(f"✗ Failed to load {cudnn_path}: {e}")
|
|
continue
|
|
print("✗ No cuDNN DLL found to test")
|
|
else:
|
|
print("✗ CUDA_PATH not set")
|
|
|
|
except Exception as e:
|
|
print(f"✗ Error in direct cuDNN test: {e}")
|
|
|
|
return False
|
|
|
|
def check_system_info():
|
|
"""Check system information"""
|
|
print("\n=== SYSTEM INFORMATION ===")
|
|
print(f"OS: {platform.system()} {platform.release()}")
|
|
print(f"Architecture: {platform.architecture()[0]}")
|
|
print(f"Processor: {platform.processor()}")
|
|
|
|
if __name__ == "__main__":
|
|
print("GPU MODE FAILURE DIAGNOSTIC")
|
|
print("=" * 50)
|
|
|
|
check_system_info()
|
|
check_environment_variables()
|
|
check_cuda_installation()
|
|
check_cudnn_installation()
|
|
check_pytorch_gpu()
|
|
check_paddle_gpu()
|
|
|
|
cudnn_working = test_cudnn_directly()
|
|
|
|
print("\n" + "=" * 50)
|
|
print("DIAGNOSTIC SUMMARY:")
|
|
|
|
if cudnn_working:
|
|
print("✓ cuDNN appears to be working correctly")
|
|
print(" The issue might be in PaddleOCR's cuDNN detection")
|
|
else:
|
|
print("✗ cuDNN is NOT working correctly")
|
|
print(" This is the root cause of the GPU mode failure")
|
|
print("\nRECOMMENDED ACTIONS:")
|
|
print("1. Reinstall cuDNN and ensure the DLLs are in CUDA_PATH/bin")
|
|
print("2. Add cuDNN bin directory to system PATH")
|
|
print("3. Restart the system after cuDNN installation")
|
|
print("4. Verify cuDNN version compatibility with CUDA 12.9") |