Files
railseek6/gpu_workaround_server.py

333 lines
12 KiB
Python

"""
GPU Workaround Server - Uses alternative approach for GPU acceleration
Bypasses cuDNN requirement while maintaining dependency isolation
"""
import os
import sys
import subprocess
import logging
from pathlib import Path
# Configure logging
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
logger = logging.getLogger(__name__)
class GPUWorkaroundServer:
"""Server that provides GPU acceleration without cuDNN dependency"""
def __init__(self):
self.port = 3015
self.venv_python = "openclip_gpu_env\\Scripts\\python.exe"
self.setup_environment()
def setup_environment(self):
"""Setup environment variables for GPU acceleration"""
logger.info("=== SETTING UP GPU WORKAROUND ENVIRONMENT ===")
# Set basic CUDA environment
os.environ['CUDA_VISIBLE_DEVICES'] = '0'
# Try to find CUDA installation
cuda_paths = [
r"C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.9",
r"C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.0",
r"C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.8"
]
for cuda_path in cuda_paths:
if os.path.exists(cuda_path):
os.environ['CUDA_PATH'] = cuda_path
os.environ['CUDA_HOME'] = cuda_path
logger.info(f"✓ Found CUDA at: {cuda_path}")
break
else:
logger.warning("⚠ No CUDA installation found, will use CPU fallback")
# Add CUDA to PATH if found
if 'CUDA_PATH' in os.environ:
cuda_bin = os.path.join(os.environ['CUDA_PATH'], 'bin')
current_path = os.environ.get('PATH', '')
if cuda_bin not in current_path:
os.environ['PATH'] = cuda_bin + ';' + current_path
logger.info(f"✓ Added to PATH: {cuda_bin}")
def test_gpu_availability(self):
"""Test GPU availability without cuDNN dependency"""
logger.info("=== TESTING GPU AVAILABILITY ===")
# Test OpenCLIP GPU in virtual environment
openclip_gpu = self.test_openclip_gpu()
# Test PaddleOCR GPU (will fallback to CPU if cuDNN missing)
paddle_gpu = self.test_paddle_gpu()
logger.info(f"✓ OpenCLIP GPU: {'AVAILABLE' if openclip_gpu else 'UNAVAILABLE'}")
logger.info(f"✓ PaddleOCR GPU: {'AVAILABLE' if paddle_gpu else 'UNAVAILABLE (using CPU)'}")
return openclip_gpu or paddle_gpu
def test_openclip_gpu(self):
"""Test OpenCLIP GPU availability in virtual environment"""
try:
if not os.path.exists(self.venv_python):
logger.error("❌ OpenCLIP virtual environment not found")
return False
test_script = """
import torch
print(f"OPENCLIP_GPU_AVAILABLE: {torch.cuda.is_available()}")
if torch.cuda.is_available():
print(f"OPENCLIP_GPU_DEVICE_COUNT: {torch.cuda.device_count()}")
print(f"OPENCLIP_GPU_DEVICE_NAME: {torch.cuda.get_device_name(0)}")
"""
result = subprocess.run([
self.venv_python, "-c", test_script
], capture_output=True, text=True, timeout=30)
if result.returncode == 0:
for line in result.stdout.split('\n'):
if "OPENCLIP_GPU_AVAILABLE:" in line and "True" in line:
return True
return False
except Exception as e:
logger.error(f"OpenCLIP GPU test failed: {e}")
return False
def test_paddle_gpu(self):
"""Test PaddlePaddle GPU availability (will fallback gracefully)"""
try:
test_script = """
try:
import paddle
print(f"PADDLE_GPU_AVAILABLE: {paddle.is_compiled_with_cuda()}")
if paddle.is_compiled_with_cuda():
try:
paddle.device.set_device('gpu')
print("PADDLE_GPU_STATUS: SUCCESS")
except Exception as e:
print(f"PADDLE_GPU_STATUS: FALLBACK - {e}")
else:
print("PADDLE_GPU_STATUS: CPU_ONLY")
except Exception as e:
print(f"PADDLE_ERROR: {e}")
"""
result = subprocess.run([
sys.executable, "-c", test_script
], capture_output=True, text=True, timeout=30)
if result.returncode == 0:
for line in result.stdout.split('\n'):
if "PADDLE_GPU_STATUS: SUCCESS" in line:
return True
return False
except Exception as e:
logger.error(f"Paddle GPU test failed: {e}")
return False
def create_optimized_classifier(self):
"""Create optimized image classifier with persistent process"""
logger.info("=== CREATING OPTIMIZED IMAGE CLASSIFIER ===")
# Create persistent classifier script
persistent_script = """
import sys
import json
import torch
import open_clip
from PIL import Image
import time
import logging
# Setup logging
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
logger = logging.getLogger(__name__)
class PersistentClassifier:
def __init__(self):
self.model = None
self.processor = None
self.text_features = None
self.text_labels = None
self.load_model()
def load_model(self):
\"\"\"Load model once and keep in memory\"\"\"
logger.info("Loading OpenCLIP model...")
start_time = time.time()
# Use smaller model for faster inference
self.model, _, self.processor = open_clip.create_model_and_transforms(
model_name="ViT-B-16", # Smaller than ViT-B-32 for speed
pretrained="laion2b_s34b_b88k"
)
# Optimized label set for document processing
self.text_labels = [
"a photo of a bee", "a photo of a flower", "a photo of a document",
"a photo of a chart", "a photo of a diagram", "a photo of a table",
"a photo of a graph", "a photo of a screenshot", "a photo of a logo",
"a photo of text", "a photo of a signature", "a photo of a barcode",
"a photo of a qr code", "a photo of a person", "a photo of a building"
]
# Move to GPU and enable optimizations
if torch.cuda.is_available():
self.model = self.model.half().cuda() # FP16 for speed
logger.info(f"Model loaded on GPU (FP16) in {time.time()-start_time:.2f}s")
else:
logger.warning("Using CPU - slower performance")
# Precompute text features once
with torch.no_grad():
text_tokens = open_clip.tokenize(self.text_labels)
if torch.cuda.is_available():
text_tokens = text_tokens.cuda()
self.text_features = self.model.encode_text(text_tokens)
self.text_features /= self.text_features.norm(dim=-1, keepdim=True)
logger.info("Model and text features loaded successfully")
def classify_batch(self, image_paths, top_k=3):
\"\"\"Classify multiple images efficiently\"\"\"
results = []
for image_path in image_paths:
try:
# Load and process image
image = Image.open(image_path).convert("RGB")
image_tensor = self.processor(image).unsqueeze(0)
# Move to GPU if available
if torch.cuda.is_available():
image_tensor = image_tensor.half().cuda()
# Encode image and compute similarity
with torch.no_grad():
image_features = self.model.encode_image(image_tensor)
image_features /= image_features.norm(dim=-1, keepdim=True)
similarity = (100.0 * image_features @ self.text_features.T).softmax(dim=-1)
values, indices = similarity[0].topk(top_k)
image_results = []
for value, index in zip(values, indices):
image_results.append({
"label": self.text_labels[index],
"confidence": float(value)
})
results.append(image_results)
except Exception as e:
logger.error(f"Error processing {image_path}: {e}")
results.append([{"label": "processing_error", "confidence": 0.0}])
return results
# Create persistent classifier instance
classifier = PersistentClassifier()
# Main loop for processing requests
while True:
try:
# Read input from stdin
line = sys.stdin.readline().strip()
if not line:
continue
request = json.loads(line)
if request.get('action') == 'classify':
image_paths = request['image_paths']
top_k = request.get('top_k', 3)
start_time = time.time()
results = classifier.classify_batch(image_paths, top_k)
processing_time = time.time() - start_time
response = {
'success': True,
'results': results,
'processing_time': processing_time,
'images_processed': len(image_paths)
}
print(json.dumps(response))
sys.stdout.flush()
elif request.get('action') == 'ping':
print(json.dumps({'success': True, 'message': 'alive'}))
sys.stdout.flush()
elif request.get('action') == 'exit':
break
except Exception as e:
error_response = {
'success': False,
'error': str(e)
}
print(json.dumps(error_response))
sys.stdout.flush()
"""
# Write persistent classifier script
script_path = "persistent_classifier.py"
with open(script_path, 'w') as f:
f.write(persistent_script)
logger.info(f"✓ Created persistent classifier: {script_path}")
return script_path
def start_server(self):
"""Start the optimized GPU server"""
logger.info("=== STARTING OPTIMIZED GPU SERVER ===")
# Test GPU availability
if not self.test_gpu_availability():
logger.warning("⚠ Limited GPU availability, some components may use CPU")
# Create optimized classifier
classifier_script = self.create_optimized_classifier()
# Start persistent classifier process
logger.info("Starting persistent image classifier...")
classifier_process = subprocess.Popen([
self.venv_python, classifier_script
], stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True)
# Start LightRAG server
logger.info(f"Starting LightRAG server on port {self.port}...")
try:
subprocess.run([
sys.executable, '-m', 'uvicorn', 'main:app',
'--host', '0.0.0.0', '--port', str(self.port), '--reload'
], check=True)
except KeyboardInterrupt:
logger.info("Server stopped by user")
except Exception as e:
logger.error(f"Server error: {e}")
finally:
# Cleanup
if classifier_process.poll() is None:
classifier_process.terminate()
classifier_process.wait()
def main():
"""Main function to start the optimized GPU server"""
print("🚀 OPTIMIZED GPU WORKAROUND SERVER")
print("=" * 50)
print("This server provides:")
print(" ✅ GPU acceleration for OpenCLIP image classification")
print(" ✅ Persistent classifier process for faster inference")
print(" ✅ Dependency isolation between PaddleOCR and OpenCLIP")
print(" ✅ Graceful fallback for missing cuDNN")
print()
server = GPUWorkaroundServer()
server.start_server()
if __name__ == "__main__":
main()