""" GPU Workaround Server - Uses alternative approach for GPU acceleration Bypasses cuDNN requirement while maintaining dependency isolation """ import os import sys import subprocess import logging from pathlib import Path # Configure logging logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s') logger = logging.getLogger(__name__) class GPUWorkaroundServer: """Server that provides GPU acceleration without cuDNN dependency""" def __init__(self): self.port = 3015 self.venv_python = "openclip_gpu_env\\Scripts\\python.exe" self.setup_environment() def setup_environment(self): """Setup environment variables for GPU acceleration""" logger.info("=== SETTING UP GPU WORKAROUND ENVIRONMENT ===") # Set basic CUDA environment os.environ['CUDA_VISIBLE_DEVICES'] = '0' # Try to find CUDA installation cuda_paths = [ r"C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.9", r"C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.0", r"C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.8" ] for cuda_path in cuda_paths: if os.path.exists(cuda_path): os.environ['CUDA_PATH'] = cuda_path os.environ['CUDA_HOME'] = cuda_path logger.info(f"✓ Found CUDA at: {cuda_path}") break else: logger.warning("⚠ No CUDA installation found, will use CPU fallback") # Add CUDA to PATH if found if 'CUDA_PATH' in os.environ: cuda_bin = os.path.join(os.environ['CUDA_PATH'], 'bin') current_path = os.environ.get('PATH', '') if cuda_bin not in current_path: os.environ['PATH'] = cuda_bin + ';' + current_path logger.info(f"✓ Added to PATH: {cuda_bin}") def test_gpu_availability(self): """Test GPU availability without cuDNN dependency""" logger.info("=== TESTING GPU AVAILABILITY ===") # Test OpenCLIP GPU in virtual environment openclip_gpu = self.test_openclip_gpu() # Test PaddleOCR GPU (will fallback to CPU if cuDNN missing) paddle_gpu = self.test_paddle_gpu() logger.info(f"✓ OpenCLIP GPU: {'AVAILABLE' if openclip_gpu else 'UNAVAILABLE'}") logger.info(f"✓ PaddleOCR GPU: {'AVAILABLE' if paddle_gpu else 'UNAVAILABLE (using CPU)'}") return openclip_gpu or paddle_gpu def test_openclip_gpu(self): """Test OpenCLIP GPU availability in virtual environment""" try: if not os.path.exists(self.venv_python): logger.error("❌ OpenCLIP virtual environment not found") return False test_script = """ import torch print(f"OPENCLIP_GPU_AVAILABLE: {torch.cuda.is_available()}") if torch.cuda.is_available(): print(f"OPENCLIP_GPU_DEVICE_COUNT: {torch.cuda.device_count()}") print(f"OPENCLIP_GPU_DEVICE_NAME: {torch.cuda.get_device_name(0)}") """ result = subprocess.run([ self.venv_python, "-c", test_script ], capture_output=True, text=True, timeout=30) if result.returncode == 0: for line in result.stdout.split('\n'): if "OPENCLIP_GPU_AVAILABLE:" in line and "True" in line: return True return False except Exception as e: logger.error(f"OpenCLIP GPU test failed: {e}") return False def test_paddle_gpu(self): """Test PaddlePaddle GPU availability (will fallback gracefully)""" try: test_script = """ try: import paddle print(f"PADDLE_GPU_AVAILABLE: {paddle.is_compiled_with_cuda()}") if paddle.is_compiled_with_cuda(): try: paddle.device.set_device('gpu') print("PADDLE_GPU_STATUS: SUCCESS") except Exception as e: print(f"PADDLE_GPU_STATUS: FALLBACK - {e}") else: print("PADDLE_GPU_STATUS: CPU_ONLY") except Exception as e: print(f"PADDLE_ERROR: {e}") """ result = subprocess.run([ sys.executable, "-c", test_script ], capture_output=True, text=True, timeout=30) if result.returncode == 0: for line in result.stdout.split('\n'): if "PADDLE_GPU_STATUS: SUCCESS" in line: return True return False except Exception as e: logger.error(f"Paddle GPU test failed: {e}") return False def create_optimized_classifier(self): """Create optimized image classifier with persistent process""" logger.info("=== CREATING OPTIMIZED IMAGE CLASSIFIER ===") # Create persistent classifier script persistent_script = """ import sys import json import torch import open_clip from PIL import Image import time import logging # Setup logging logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s') logger = logging.getLogger(__name__) class PersistentClassifier: def __init__(self): self.model = None self.processor = None self.text_features = None self.text_labels = None self.load_model() def load_model(self): \"\"\"Load model once and keep in memory\"\"\" logger.info("Loading OpenCLIP model...") start_time = time.time() # Use smaller model for faster inference self.model, _, self.processor = open_clip.create_model_and_transforms( model_name="ViT-B-16", # Smaller than ViT-B-32 for speed pretrained="laion2b_s34b_b88k" ) # Optimized label set for document processing self.text_labels = [ "a photo of a bee", "a photo of a flower", "a photo of a document", "a photo of a chart", "a photo of a diagram", "a photo of a table", "a photo of a graph", "a photo of a screenshot", "a photo of a logo", "a photo of text", "a photo of a signature", "a photo of a barcode", "a photo of a qr code", "a photo of a person", "a photo of a building" ] # Move to GPU and enable optimizations if torch.cuda.is_available(): self.model = self.model.half().cuda() # FP16 for speed logger.info(f"Model loaded on GPU (FP16) in {time.time()-start_time:.2f}s") else: logger.warning("Using CPU - slower performance") # Precompute text features once with torch.no_grad(): text_tokens = open_clip.tokenize(self.text_labels) if torch.cuda.is_available(): text_tokens = text_tokens.cuda() self.text_features = self.model.encode_text(text_tokens) self.text_features /= self.text_features.norm(dim=-1, keepdim=True) logger.info("Model and text features loaded successfully") def classify_batch(self, image_paths, top_k=3): \"\"\"Classify multiple images efficiently\"\"\" results = [] for image_path in image_paths: try: # Load and process image image = Image.open(image_path).convert("RGB") image_tensor = self.processor(image).unsqueeze(0) # Move to GPU if available if torch.cuda.is_available(): image_tensor = image_tensor.half().cuda() # Encode image and compute similarity with torch.no_grad(): image_features = self.model.encode_image(image_tensor) image_features /= image_features.norm(dim=-1, keepdim=True) similarity = (100.0 * image_features @ self.text_features.T).softmax(dim=-1) values, indices = similarity[0].topk(top_k) image_results = [] for value, index in zip(values, indices): image_results.append({ "label": self.text_labels[index], "confidence": float(value) }) results.append(image_results) except Exception as e: logger.error(f"Error processing {image_path}: {e}") results.append([{"label": "processing_error", "confidence": 0.0}]) return results # Create persistent classifier instance classifier = PersistentClassifier() # Main loop for processing requests while True: try: # Read input from stdin line = sys.stdin.readline().strip() if not line: continue request = json.loads(line) if request.get('action') == 'classify': image_paths = request['image_paths'] top_k = request.get('top_k', 3) start_time = time.time() results = classifier.classify_batch(image_paths, top_k) processing_time = time.time() - start_time response = { 'success': True, 'results': results, 'processing_time': processing_time, 'images_processed': len(image_paths) } print(json.dumps(response)) sys.stdout.flush() elif request.get('action') == 'ping': print(json.dumps({'success': True, 'message': 'alive'})) sys.stdout.flush() elif request.get('action') == 'exit': break except Exception as e: error_response = { 'success': False, 'error': str(e) } print(json.dumps(error_response)) sys.stdout.flush() """ # Write persistent classifier script script_path = "persistent_classifier.py" with open(script_path, 'w') as f: f.write(persistent_script) logger.info(f"✓ Created persistent classifier: {script_path}") return script_path def start_server(self): """Start the optimized GPU server""" logger.info("=== STARTING OPTIMIZED GPU SERVER ===") # Test GPU availability if not self.test_gpu_availability(): logger.warning("⚠ Limited GPU availability, some components may use CPU") # Create optimized classifier classifier_script = self.create_optimized_classifier() # Start persistent classifier process logger.info("Starting persistent image classifier...") classifier_process = subprocess.Popen([ self.venv_python, classifier_script ], stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True) # Start LightRAG server logger.info(f"Starting LightRAG server on port {self.port}...") try: subprocess.run([ sys.executable, '-m', 'uvicorn', 'main:app', '--host', '0.0.0.0', '--port', str(self.port), '--reload' ], check=True) except KeyboardInterrupt: logger.info("Server stopped by user") except Exception as e: logger.error(f"Server error: {e}") finally: # Cleanup if classifier_process.poll() is None: classifier_process.terminate() classifier_process.wait() def main(): """Main function to start the optimized GPU server""" print("🚀 OPTIMIZED GPU WORKAROUND SERVER") print("=" * 50) print("This server provides:") print(" ✅ GPU acceleration for OpenCLIP image classification") print(" ✅ Persistent classifier process for faster inference") print(" ✅ Dependency isolation between PaddleOCR and OpenCLIP") print(" ✅ Graceful fallback for missing cuDNN") print() server = GPUWorkaroundServer() server.start_server() if __name__ == "__main__": main()