railseek6/gpu_workaround_server.py

"""
GPU Workaround Server - Uses alternative approach for GPU acceleration
Bypasses cuDNN requirement while maintaining dependency isolation
"""
import os
import sys
import subprocess
import logging
from pathlib import Path

# Configure logging
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
logger = logging.getLogger(__name__)

class GPUWorkaroundServer:
    """Server that provides GPU acceleration without cuDNN dependency"""

    def __init__(self):
        self.port = 3015
        self.venv_python = "openclip_gpu_env\\Scripts\\python.exe"
        self.setup_environment()

    def setup_environment(self):
        """Setup environment variables for GPU acceleration"""
        logger.info("=== SETTING UP GPU WORKAROUND ENVIRONMENT ===")

        # Set basic CUDA environment
        os.environ['CUDA_VISIBLE_DEVICES'] = '0'

        # Try to find CUDA installation
        cuda_paths = [
            r"C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.9",
            r"C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.0",
            r"C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.8"
        ]

        for cuda_path in cuda_paths:
            if os.path.exists(cuda_path):
                os.environ['CUDA_PATH'] = cuda_path
                os.environ['CUDA_HOME'] = cuda_path
                logger.info(f"✓ Found CUDA at: {cuda_path}")
                break
        else:
            logger.warning("⚠ No CUDA installation found, will use CPU fallback")

        # Add CUDA to PATH if found
        if 'CUDA_PATH' in os.environ:
            cuda_bin = os.path.join(os.environ['CUDA_PATH'], 'bin')
            current_path = os.environ.get('PATH', '')
            if cuda_bin not in current_path:
                os.environ['PATH'] = cuda_bin + ';' + current_path
                logger.info(f"✓ Added to PATH: {cuda_bin}")

    def test_gpu_availability(self):
        """Test GPU availability without cuDNN dependency"""
        logger.info("=== TESTING GPU AVAILABILITY ===")

        # Test OpenCLIP GPU in virtual environment
        openclip_gpu = self.test_openclip_gpu()

        # Test PaddleOCR GPU (will fallback to CPU if cuDNN missing)
        paddle_gpu = self.test_paddle_gpu()

        logger.info(f"✓ OpenCLIP GPU: {'AVAILABLE' if openclip_gpu else 'UNAVAILABLE'}")
        logger.info(f"✓ PaddleOCR GPU: {'AVAILABLE' if paddle_gpu else 'UNAVAILABLE (using CPU)'}")

        return openclip_gpu or paddle_gpu

    def test_openclip_gpu(self):
        """Test OpenCLIP GPU availability in virtual environment"""
        try:
            if not os.path.exists(self.venv_python):
                logger.error("❌ OpenCLIP virtual environment not found")
                return False

            test_script = """
import torch
print(f"OPENCLIP_GPU_AVAILABLE: {torch.cuda.is_available()}")
if torch.cuda.is_available():
    print(f"OPENCLIP_GPU_DEVICE_COUNT: {torch.cuda.device_count()}")
    print(f"OPENCLIP_GPU_DEVICE_NAME: {torch.cuda.get_device_name(0)}")
"""
            result = subprocess.run([
                self.venv_python, "-c", test_script
            ], capture_output=True, text=True, timeout=30)

            if result.returncode == 0:
                for line in result.stdout.split('\n'):
                    if "OPENCLIP_GPU_AVAILABLE:" in line and "True" in line:
                        return True
            return False

        except Exception as e:
            logger.error(f"OpenCLIP GPU test failed: {e}")
            return False

    def test_paddle_gpu(self):
        """Test PaddlePaddle GPU availability (will fallback gracefully)"""
        try:
            test_script = """
try:
    import paddle
    print(f"PADDLE_GPU_AVAILABLE: {paddle.is_compiled_with_cuda()}")
    if paddle.is_compiled_with_cuda():
        try:
            paddle.device.set_device('gpu')
            print("PADDLE_GPU_STATUS: SUCCESS")
        except Exception as e:
            print(f"PADDLE_GPU_STATUS: FALLBACK - {e}")
    else:
        print("PADDLE_GPU_STATUS: CPU_ONLY")
except Exception as e:
    print(f"PADDLE_ERROR: {e}")
"""
            result = subprocess.run([
                sys.executable, "-c", test_script
            ], capture_output=True, text=True, timeout=30)

            if result.returncode == 0:
                for line in result.stdout.split('\n'):
                    if "PADDLE_GPU_STATUS: SUCCESS" in line:
                        return True
            return False

        except Exception as e:
            logger.error(f"Paddle GPU test failed: {e}")
            return False

    def create_optimized_classifier(self):
        """Create optimized image classifier with persistent process"""
        logger.info("=== CREATING OPTIMIZED IMAGE CLASSIFIER ===")

        # Create persistent classifier script
        persistent_script = """
import sys
import json
import torch
import open_clip
from PIL import Image
import time
import logging

# Setup logging
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
logger = logging.getLogger(__name__)

class PersistentClassifier:
    def __init__(self):
        self.model = None
        self.processor = None
        self.text_features = None
        self.text_labels = None
        self.load_model()

    def load_model(self):
        \"\"\"Load model once and keep in memory\"\"\"
        logger.info("Loading OpenCLIP model...")
        start_time = time.time()

        # Use smaller model for faster inference
        self.model, _, self.processor = open_clip.create_model_and_transforms(
            model_name="ViT-B-16",  # Smaller than ViT-B-32 for speed
            pretrained="laion2b_s34b_b88k"
        )

        # Optimized label set for document processing
        self.text_labels = [
            "a photo of a bee", "a photo of a flower", "a photo of a document",
            "a photo of a chart", "a photo of a diagram", "a photo of a table",
            "a photo of a graph", "a photo of a screenshot", "a photo of a logo",
            "a photo of text", "a photo of a signature", "a photo of a barcode",
            "a photo of a qr code", "a photo of a person", "a photo of a building"
        ]

        # Move to GPU and enable optimizations
        if torch.cuda.is_available():
            self.model = self.model.half().cuda()  # FP16 for speed
            logger.info(f"Model loaded on GPU (FP16) in {time.time()-start_time:.2f}s")
        else:
            logger.warning("Using CPU - slower performance")

        # Precompute text features once
        with torch.no_grad():
            text_tokens = open_clip.tokenize(self.text_labels)
            if torch.cuda.is_available():
                text_tokens = text_tokens.cuda()
            self.text_features = self.model.encode_text(text_tokens)
            self.text_features /= self.text_features.norm(dim=-1, keepdim=True)

        logger.info("Model and text features loaded successfully")

    def classify_batch(self, image_paths, top_k=3):
        \"\"\"Classify multiple images efficiently\"\"\"
        results = []

        for image_path in image_paths:
            try:
                # Load and process image
                image = Image.open(image_path).convert("RGB")
                image_tensor = self.processor(image).unsqueeze(0)

                # Move to GPU if available
                if torch.cuda.is_available():
                    image_tensor = image_tensor.half().cuda()

                # Encode image and compute similarity
                with torch.no_grad():
                    image_features = self.model.encode_image(image_tensor)
                    image_features /= image_features.norm(dim=-1, keepdim=True)

                    similarity = (100.0 * image_features @ self.text_features.T).softmax(dim=-1)
                    values, indices = similarity[0].topk(top_k)

                    image_results = []
                    for value, index in zip(values, indices):
                        image_results.append({
                            "label": self.text_labels[index],
                            "confidence": float(value)
                        })

                    results.append(image_results)

            except Exception as e:
                logger.error(f"Error processing {image_path}: {e}")
                results.append([{"label": "processing_error", "confidence": 0.0}])

        return results

# Create persistent classifier instance
classifier = PersistentClassifier()

# Main loop for processing requests
while True:
    try:
        # Read input from stdin
        line = sys.stdin.readline().strip()
        if not line:
            continue

        request = json.loads(line)

        if request.get('action') == 'classify':
            image_paths = request['image_paths']
            top_k = request.get('top_k', 3)

            start_time = time.time()
            results = classifier.classify_batch(image_paths, top_k)
            processing_time = time.time() - start_time

            response = {
                'success': True,
                'results': results,
                'processing_time': processing_time,
                'images_processed': len(image_paths)
            }

            print(json.dumps(response))
            sys.stdout.flush()

        elif request.get('action') == 'ping':
            print(json.dumps({'success': True, 'message': 'alive'}))
            sys.stdout.flush()

        elif request.get('action') == 'exit':
            break

    except Exception as e:
        error_response = {
            'success': False,
            'error': str(e)
        }
        print(json.dumps(error_response))
        sys.stdout.flush()
"""

        # Write persistent classifier script
        script_path = "persistent_classifier.py"
        with open(script_path, 'w') as f:
            f.write(persistent_script)

        logger.info(f"✓ Created persistent classifier: {script_path}")
        return script_path

    def start_server(self):
        """Start the optimized GPU server"""
        logger.info("=== STARTING OPTIMIZED GPU SERVER ===")

        # Test GPU availability
        if not self.test_gpu_availability():
            logger.warning("⚠ Limited GPU availability, some components may use CPU")

        # Create optimized classifier
        classifier_script = self.create_optimized_classifier()

        # Start persistent classifier process
        logger.info("Starting persistent image classifier...")
        classifier_process = subprocess.Popen([
            self.venv_python, classifier_script
        ], stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True)

        # Start LightRAG server
        logger.info(f"Starting LightRAG server on port {self.port}...")
        try:
            subprocess.run([
                sys.executable, '-m', 'uvicorn', 'main:app',
                '--host', '0.0.0.0', '--port', str(self.port), '--reload'
            ], check=True)
        except KeyboardInterrupt:
            logger.info("Server stopped by user")
        except Exception as e:
            logger.error(f"Server error: {e}")
        finally:
            # Cleanup
            if classifier_process.poll() is None:
                classifier_process.terminate()
                classifier_process.wait()

def main():
    """Main function to start the optimized GPU server"""
    print("🚀 OPTIMIZED GPU WORKAROUND SERVER")
    print("=" * 50)
    print("This server provides:")
    print("  ✅ GPU acceleration for OpenCLIP image classification")
    print("  ✅ Persistent classifier process for faster inference")
    print("  ✅ Dependency isolation between PaddleOCR and OpenCLIP")
    print("  ✅ Graceful fallback for missing cuDNN")
    print()

    server = GPUWorkaroundServer()
    server.start_server()

if __name__ == "__main__":
    main()