333 lines
12 KiB
Python
333 lines
12 KiB
Python
"""
|
|
GPU Workaround Server - Uses alternative approach for GPU acceleration
|
|
Bypasses cuDNN requirement while maintaining dependency isolation
|
|
"""
|
|
import os
|
|
import sys
|
|
import subprocess
|
|
import logging
|
|
from pathlib import Path
|
|
|
|
# Configure logging
|
|
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
|
|
logger = logging.getLogger(__name__)
|
|
|
|
class GPUWorkaroundServer:
|
|
"""Server that provides GPU acceleration without cuDNN dependency"""
|
|
|
|
def __init__(self):
|
|
self.port = 3015
|
|
self.venv_python = "openclip_gpu_env\\Scripts\\python.exe"
|
|
self.setup_environment()
|
|
|
|
def setup_environment(self):
|
|
"""Setup environment variables for GPU acceleration"""
|
|
logger.info("=== SETTING UP GPU WORKAROUND ENVIRONMENT ===")
|
|
|
|
# Set basic CUDA environment
|
|
os.environ['CUDA_VISIBLE_DEVICES'] = '0'
|
|
|
|
# Try to find CUDA installation
|
|
cuda_paths = [
|
|
r"C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.9",
|
|
r"C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.0",
|
|
r"C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.8"
|
|
]
|
|
|
|
for cuda_path in cuda_paths:
|
|
if os.path.exists(cuda_path):
|
|
os.environ['CUDA_PATH'] = cuda_path
|
|
os.environ['CUDA_HOME'] = cuda_path
|
|
logger.info(f"✓ Found CUDA at: {cuda_path}")
|
|
break
|
|
else:
|
|
logger.warning("⚠ No CUDA installation found, will use CPU fallback")
|
|
|
|
# Add CUDA to PATH if found
|
|
if 'CUDA_PATH' in os.environ:
|
|
cuda_bin = os.path.join(os.environ['CUDA_PATH'], 'bin')
|
|
current_path = os.environ.get('PATH', '')
|
|
if cuda_bin not in current_path:
|
|
os.environ['PATH'] = cuda_bin + ';' + current_path
|
|
logger.info(f"✓ Added to PATH: {cuda_bin}")
|
|
|
|
def test_gpu_availability(self):
|
|
"""Test GPU availability without cuDNN dependency"""
|
|
logger.info("=== TESTING GPU AVAILABILITY ===")
|
|
|
|
# Test OpenCLIP GPU in virtual environment
|
|
openclip_gpu = self.test_openclip_gpu()
|
|
|
|
# Test PaddleOCR GPU (will fallback to CPU if cuDNN missing)
|
|
paddle_gpu = self.test_paddle_gpu()
|
|
|
|
logger.info(f"✓ OpenCLIP GPU: {'AVAILABLE' if openclip_gpu else 'UNAVAILABLE'}")
|
|
logger.info(f"✓ PaddleOCR GPU: {'AVAILABLE' if paddle_gpu else 'UNAVAILABLE (using CPU)'}")
|
|
|
|
return openclip_gpu or paddle_gpu
|
|
|
|
def test_openclip_gpu(self):
|
|
"""Test OpenCLIP GPU availability in virtual environment"""
|
|
try:
|
|
if not os.path.exists(self.venv_python):
|
|
logger.error("❌ OpenCLIP virtual environment not found")
|
|
return False
|
|
|
|
test_script = """
|
|
import torch
|
|
print(f"OPENCLIP_GPU_AVAILABLE: {torch.cuda.is_available()}")
|
|
if torch.cuda.is_available():
|
|
print(f"OPENCLIP_GPU_DEVICE_COUNT: {torch.cuda.device_count()}")
|
|
print(f"OPENCLIP_GPU_DEVICE_NAME: {torch.cuda.get_device_name(0)}")
|
|
"""
|
|
result = subprocess.run([
|
|
self.venv_python, "-c", test_script
|
|
], capture_output=True, text=True, timeout=30)
|
|
|
|
if result.returncode == 0:
|
|
for line in result.stdout.split('\n'):
|
|
if "OPENCLIP_GPU_AVAILABLE:" in line and "True" in line:
|
|
return True
|
|
return False
|
|
|
|
except Exception as e:
|
|
logger.error(f"OpenCLIP GPU test failed: {e}")
|
|
return False
|
|
|
|
def test_paddle_gpu(self):
|
|
"""Test PaddlePaddle GPU availability (will fallback gracefully)"""
|
|
try:
|
|
test_script = """
|
|
try:
|
|
import paddle
|
|
print(f"PADDLE_GPU_AVAILABLE: {paddle.is_compiled_with_cuda()}")
|
|
if paddle.is_compiled_with_cuda():
|
|
try:
|
|
paddle.device.set_device('gpu')
|
|
print("PADDLE_GPU_STATUS: SUCCESS")
|
|
except Exception as e:
|
|
print(f"PADDLE_GPU_STATUS: FALLBACK - {e}")
|
|
else:
|
|
print("PADDLE_GPU_STATUS: CPU_ONLY")
|
|
except Exception as e:
|
|
print(f"PADDLE_ERROR: {e}")
|
|
"""
|
|
result = subprocess.run([
|
|
sys.executable, "-c", test_script
|
|
], capture_output=True, text=True, timeout=30)
|
|
|
|
if result.returncode == 0:
|
|
for line in result.stdout.split('\n'):
|
|
if "PADDLE_GPU_STATUS: SUCCESS" in line:
|
|
return True
|
|
return False
|
|
|
|
except Exception as e:
|
|
logger.error(f"Paddle GPU test failed: {e}")
|
|
return False
|
|
|
|
def create_optimized_classifier(self):
|
|
"""Create optimized image classifier with persistent process"""
|
|
logger.info("=== CREATING OPTIMIZED IMAGE CLASSIFIER ===")
|
|
|
|
# Create persistent classifier script
|
|
persistent_script = """
|
|
import sys
|
|
import json
|
|
import torch
|
|
import open_clip
|
|
from PIL import Image
|
|
import time
|
|
import logging
|
|
|
|
# Setup logging
|
|
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
|
|
logger = logging.getLogger(__name__)
|
|
|
|
class PersistentClassifier:
|
|
def __init__(self):
|
|
self.model = None
|
|
self.processor = None
|
|
self.text_features = None
|
|
self.text_labels = None
|
|
self.load_model()
|
|
|
|
def load_model(self):
|
|
\"\"\"Load model once and keep in memory\"\"\"
|
|
logger.info("Loading OpenCLIP model...")
|
|
start_time = time.time()
|
|
|
|
# Use smaller model for faster inference
|
|
self.model, _, self.processor = open_clip.create_model_and_transforms(
|
|
model_name="ViT-B-16", # Smaller than ViT-B-32 for speed
|
|
pretrained="laion2b_s34b_b88k"
|
|
)
|
|
|
|
# Optimized label set for document processing
|
|
self.text_labels = [
|
|
"a photo of a bee", "a photo of a flower", "a photo of a document",
|
|
"a photo of a chart", "a photo of a diagram", "a photo of a table",
|
|
"a photo of a graph", "a photo of a screenshot", "a photo of a logo",
|
|
"a photo of text", "a photo of a signature", "a photo of a barcode",
|
|
"a photo of a qr code", "a photo of a person", "a photo of a building"
|
|
]
|
|
|
|
# Move to GPU and enable optimizations
|
|
if torch.cuda.is_available():
|
|
self.model = self.model.half().cuda() # FP16 for speed
|
|
logger.info(f"Model loaded on GPU (FP16) in {time.time()-start_time:.2f}s")
|
|
else:
|
|
logger.warning("Using CPU - slower performance")
|
|
|
|
# Precompute text features once
|
|
with torch.no_grad():
|
|
text_tokens = open_clip.tokenize(self.text_labels)
|
|
if torch.cuda.is_available():
|
|
text_tokens = text_tokens.cuda()
|
|
self.text_features = self.model.encode_text(text_tokens)
|
|
self.text_features /= self.text_features.norm(dim=-1, keepdim=True)
|
|
|
|
logger.info("Model and text features loaded successfully")
|
|
|
|
def classify_batch(self, image_paths, top_k=3):
|
|
\"\"\"Classify multiple images efficiently\"\"\"
|
|
results = []
|
|
|
|
for image_path in image_paths:
|
|
try:
|
|
# Load and process image
|
|
image = Image.open(image_path).convert("RGB")
|
|
image_tensor = self.processor(image).unsqueeze(0)
|
|
|
|
# Move to GPU if available
|
|
if torch.cuda.is_available():
|
|
image_tensor = image_tensor.half().cuda()
|
|
|
|
# Encode image and compute similarity
|
|
with torch.no_grad():
|
|
image_features = self.model.encode_image(image_tensor)
|
|
image_features /= image_features.norm(dim=-1, keepdim=True)
|
|
|
|
similarity = (100.0 * image_features @ self.text_features.T).softmax(dim=-1)
|
|
values, indices = similarity[0].topk(top_k)
|
|
|
|
image_results = []
|
|
for value, index in zip(values, indices):
|
|
image_results.append({
|
|
"label": self.text_labels[index],
|
|
"confidence": float(value)
|
|
})
|
|
|
|
results.append(image_results)
|
|
|
|
except Exception as e:
|
|
logger.error(f"Error processing {image_path}: {e}")
|
|
results.append([{"label": "processing_error", "confidence": 0.0}])
|
|
|
|
return results
|
|
|
|
# Create persistent classifier instance
|
|
classifier = PersistentClassifier()
|
|
|
|
# Main loop for processing requests
|
|
while True:
|
|
try:
|
|
# Read input from stdin
|
|
line = sys.stdin.readline().strip()
|
|
if not line:
|
|
continue
|
|
|
|
request = json.loads(line)
|
|
|
|
if request.get('action') == 'classify':
|
|
image_paths = request['image_paths']
|
|
top_k = request.get('top_k', 3)
|
|
|
|
start_time = time.time()
|
|
results = classifier.classify_batch(image_paths, top_k)
|
|
processing_time = time.time() - start_time
|
|
|
|
response = {
|
|
'success': True,
|
|
'results': results,
|
|
'processing_time': processing_time,
|
|
'images_processed': len(image_paths)
|
|
}
|
|
|
|
print(json.dumps(response))
|
|
sys.stdout.flush()
|
|
|
|
elif request.get('action') == 'ping':
|
|
print(json.dumps({'success': True, 'message': 'alive'}))
|
|
sys.stdout.flush()
|
|
|
|
elif request.get('action') == 'exit':
|
|
break
|
|
|
|
except Exception as e:
|
|
error_response = {
|
|
'success': False,
|
|
'error': str(e)
|
|
}
|
|
print(json.dumps(error_response))
|
|
sys.stdout.flush()
|
|
"""
|
|
|
|
# Write persistent classifier script
|
|
script_path = "persistent_classifier.py"
|
|
with open(script_path, 'w') as f:
|
|
f.write(persistent_script)
|
|
|
|
logger.info(f"✓ Created persistent classifier: {script_path}")
|
|
return script_path
|
|
|
|
def start_server(self):
|
|
"""Start the optimized GPU server"""
|
|
logger.info("=== STARTING OPTIMIZED GPU SERVER ===")
|
|
|
|
# Test GPU availability
|
|
if not self.test_gpu_availability():
|
|
logger.warning("⚠ Limited GPU availability, some components may use CPU")
|
|
|
|
# Create optimized classifier
|
|
classifier_script = self.create_optimized_classifier()
|
|
|
|
# Start persistent classifier process
|
|
logger.info("Starting persistent image classifier...")
|
|
classifier_process = subprocess.Popen([
|
|
self.venv_python, classifier_script
|
|
], stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True)
|
|
|
|
# Start LightRAG server
|
|
logger.info(f"Starting LightRAG server on port {self.port}...")
|
|
try:
|
|
subprocess.run([
|
|
sys.executable, '-m', 'uvicorn', 'main:app',
|
|
'--host', '0.0.0.0', '--port', str(self.port), '--reload'
|
|
], check=True)
|
|
except KeyboardInterrupt:
|
|
logger.info("Server stopped by user")
|
|
except Exception as e:
|
|
logger.error(f"Server error: {e}")
|
|
finally:
|
|
# Cleanup
|
|
if classifier_process.poll() is None:
|
|
classifier_process.terminate()
|
|
classifier_process.wait()
|
|
|
|
def main():
|
|
"""Main function to start the optimized GPU server"""
|
|
print("🚀 OPTIMIZED GPU WORKAROUND SERVER")
|
|
print("=" * 50)
|
|
print("This server provides:")
|
|
print(" ✅ GPU acceleration for OpenCLIP image classification")
|
|
print(" ✅ Persistent classifier process for faster inference")
|
|
print(" ✅ Dependency isolation between PaddleOCR and OpenCLIP")
|
|
print(" ✅ Graceful fallback for missing cuDNN")
|
|
print()
|
|
|
|
server = GPUWorkaroundServer()
|
|
server.start_server()
|
|
|
|
if __name__ == "__main__":
|
|
main() |