#!/usr/bin/env python3
"""
Comprehensive LightRAG Server Startup Script
Fixes all identified issues with zrun.bat failure:
1. Port 3015 binding conflicts
2. Missing environment variables
3. PyTorch DLL issues (workaround)
4. LLM configuration mismatches
"""

import os
import sys
import subprocess
import time
import signal
import socket
import psutil
from pathlib import Path

def kill_process_on_port(port):
    """Kill any process using the specified port."""
    print(f"Checking for processes on port {port}...")
    killed = False
    
    try:
        # Method 1: Use netstat to find PID
        result = subprocess.run(
            ["netstat", "-ano", "|", "findstr", f":{port}"],
            shell=True,
            capture_output=True,
            text=True
        )
        
        if result.stdout:
            for line in result.stdout.strip().split('\n'):
                if f":{port}" in line:
                    parts = line.strip().split()
                    if len(parts) >= 5:
                        pid = parts[-1]
                        try:
                            pid = int(pid)
                            print(f"  Found process {pid} using port {port}")
                            proc = psutil.Process(pid)
                            proc.terminate()
                            time.sleep(1)
                            if proc.is_running():
                                proc.kill()
                                print(f"  Killed process {pid}")
                            else:
                                print(f"  Terminated process {pid}")
                            killed = True
                        except (psutil.NoSuchProcess, ValueError, PermissionError) as e:
                            print(f"  Could not kill process {pid}: {e}")
    
    except Exception as e:
        print(f"  Error checking port {port}: {e}")
    
    # Method 2: Try taskkill with port filter
    try:
        subprocess.run(
            ["powershell", "-Command", f"Get-NetTCPConnection -LocalPort {port} | ForEach-Object {{ Stop-Process -Id $_.OwningProcess -Force }}"],
            shell=True,
            capture_output=True
        )
        print(f"  Attempted PowerShell port cleanup")
        killed = True
    except Exception as e:
        print(f"  PowerShell cleanup failed: {e}")
    
    # Method 3: Try to bind to the port to check if it's free
    sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
    try:
        sock.bind(('0.0.0.0', port))
        sock.close()
        print(f"  Port {port} is now available")
        return True
    except OSError:
        print(f"  Port {port} is still in use after cleanup attempts")
        return killed
    finally:
        try:
            sock.close()
        except:
            pass

def check_environment_variables():
    """Check all required environment variables."""
    print("Checking environment variables...")
    
    env_file_locations = [
        Path(".env"),
        Path("LightRAG-main/.env"),
        Path("LightRAG-main/.env.example"),
        Path("../.env")
    ]
    
    env_vars_required = {
        "OPENAI_API_KEY": "DeepSeek API key for LLM",
        "JINA_API_KEY": "Jina API key for embeddings (optional but recommended)",
        "LLM_BINDING_API_KEY": "LLM API key (will be set from OPENAI_API_KEY if missing)"
    }
    
    # Try to load from .env files
    for env_file in env_file_locations:
        if env_file.exists():
            print(f"  Found .env file at {env_file}")
            try:
                with open(env_file, 'r') as f:
                    for line in f:
                        line = line.strip()
                        if line and not line.startswith('#') and '=' in line:
                            key, value = line.split('=', 1)
                            os.environ[key.strip()] = value.strip()
            except Exception as e:
                print(f"  Error reading {env_file}: {e}")
    
    # Check and set required variables
    missing_vars = []
    for var, description in env_vars_required.items():
        if var in os.environ and os.environ[var]:
            print(f"  ✓ {var}: Set")
            
            # Ensure LLM_BINDING_API_KEY is set from OPENAI_API_KEY if needed
            if var == "OPENAI_API_KEY" and "LLM_BINDING_API_KEY" not in os.environ:
                os.environ["LLM_BINDING_API_KEY"] = os.environ[var]
                print(f"    Set LLM_BINDING_API_KEY from OPENAI_API_KEY")
        else:
            print(f"  ✗ {var}: Missing - {description}")
            missing_vars.append(var)
    
    # Special handling for JINA_API_KEY - it's optional but recommended
    if "JINA_API_KEY" in missing_vars:
        print(f"  ⚠ JINA_API_KEY is missing - embedding may fail")
        # Try to use Ollama as fallback
        os.environ["EMBEDDING_BINDING"] = "ollama"
        print(f"    Set EMBEDDING_BINDING=ollama as fallback")
        missing_vars.remove("JINA_API_KEY")
    
    if missing_vars:
        print(f"\n⚠ Missing required environment variables: {missing_vars}")
        print("  Some features may not work properly.")
        return False
    
    return True

def check_pytorch_installation():
    """Check if PyTorch is working properly."""
    print("Checking PyTorch installation...")
    
    try:
        import torch
        print(f"  ✓ PyTorch version: {torch.__version__}")
        
        # Check if CUDA is available
        if torch.cuda.is_available():
            print(f"  ✓ CUDA is available")
        else:
            print(f"  ⚠ CUDA not available - using CPU")
        
        # Try a simple tensor operation
        x = torch.tensor([1.0, 2.0, 3.0])
        y = x * 2
        print(f"  ✓ Basic tensor operations work")
        
        return True
    except Exception as e:
        print(f"  ✗ PyTorch error: {e}")
        print(f"  ⚠ PyTorch may have DLL issues. This may cause spaCy to fail.")
        
        # Try to set environment variable to work around DLL issues
        os.environ["KMP_DUPLICATE_LIB_OK"] = "TRUE"
        print(f"    Set KMP_DUPLICATE_LIB_OK=TRUE to work around DLL issues")
        
        return False

def start_lightrag_server():
    """Start the LightRAG server with proper configuration."""
    print("\nStarting LightRAG server...")
    
    # Get current directory
    current_dir = Path.cwd()
    lightrag_dir = current_dir / "LightRAG-main"
    
    if not lightrag_dir.exists():
        print(f"  ✗ LightRAG directory not found: {lightrag_dir}")
        return False
    
    # Build the command
    cmd = [
        sys.executable,  # Use current Python interpreter
        "-m", "lightrag.api.lightrag_server",
        "--port", "3015",
        "--host", "0.0.0.0",
        "--working-dir", "rag_storage",
        "--input-dir", "../inputs",
        "--key", "jleu1212",  # Default API key
        "--auto-scan-at-startup",
        "--llm-binding", "openai",
        "--embedding-binding", "ollama",  # Use Ollama instead of Jina to avoid API key issues
        "--rerank-binding", "jina"
    ]
    
    # Add environment variables
    env = os.environ.copy()
    
    # Ensure LLM configuration uses DeepSeek
    if "OPENAI_API_KEY" in env:
        env["LLM_BINDING_API_KEY"] = env["OPENAI_API_KEY"]
        # Set DeepSeek base URL
        env["OPENAI_API_BASE"] = "https://api.deepseek.com/v1"
        print(f"  Configured DeepSeek API: {env['OPENAI_API_BASE']}")
    
    print(f"  Command: {' '.join(cmd)}")
    print(f"  Working directory: {lightrag_dir}")
    
    try:
        # Change to LightRAG directory and start server
        os.chdir(lightrag_dir)
        
        # Start the process
        process = subprocess.Popen(
            cmd,
            env=env,
            stdout=subprocess.PIPE,
            stderr=subprocess.STDOUT,
            text=True,
            bufsize=1,
            universal_newlines=True
        )
        
        print(f"  Server started with PID: {process.pid}")
        
        # Monitor output for a few seconds
        print("\nServer output (first 30 seconds):")
        print("-" * 50)
        
        start_time = time.time()
        server_started = False
        
        while time.time() - start_time < 30:  # Monitor for 30 seconds
            line = process.stdout.readline()
            if line:
                print(line.rstrip())
                
                # Check for success indicators
                if "Uvicorn running on" in line:
                    server_started = True
                    print("✓ Server started successfully!")
                elif "Errno 10048" in line or "socket address" in line:
                    print("✗ Port binding failed!")
                    process.terminate()
                    return False
                elif "Application startup complete" in line:
                    print("✓ Application started successfully!")
            
            # Check if process died
            if process.poll() is not None:
                print(f"✗ Server process died with exit code: {process.returncode}")
                return False
            
            time.sleep(0.1)
        
        print("-" * 50)
        
        if server_started:
            print(f"\n✓ LightRAG server is running on http://localhost:3015")
            print(f"  Process PID: {process.pid}")
            print(f"  Press Ctrl+C to stop the server")
            
            # Keep the process running
            try:
                process.wait()
            except KeyboardInterrupt:
                print("\nStopping server...")
                process.terminate()
                process.wait()
                print("Server stopped.")
            
            return True
        else:
            print("\n✗ Server may not have started properly")
            process.terminate()
            return False
            
    except Exception as e:
        print(f"✗ Error starting server: {e}")
        return False
    finally:
        # Return to original directory
        os.chdir(current_dir)

def main():
    """Main function to start the LightRAG server with comprehensive fixes."""
    print("=" * 60)
    print("LightRAG Server Startup - Comprehensive Fix")
    print("=" * 60)
    
    # Step 1: Kill processes on port 3015
    print("\n[1/4] Port cleanup...")
    if not kill_process_on_port(3015):
        print("⚠ Could not free port 3015. Trying alternative port...")
        # Could implement alternative port logic here
    
    # Step 2: Check environment variables
    print("\n[2/4] Environment setup...")
    env_ok = check_environment_variables()
    
    # Step 3: Check PyTorch
    print("\n[3/4] Dependency checks...")
    pytorch_ok = check_pytorch_installation()
    
    if not pytorch_ok:
        print("⚠ PyTorch has issues - entity extraction may fail")
        print("  Consider reinstalling PyTorch or using CPU-only version")
    
    # Step 4: Start server
    print("\n[4/4] Starting server...")
    success = start_lightrag_server()
    
    if success:
        print("\n" + "=" * 60)
        print("SUCCESS: LightRAG server is running!")
        print("Access the Web UI at: http://localhost:3015")
        print("API documentation at: http://localhost:3015/docs")
        print("=" * 60)
        return 0
    else:
        print("\n" + "=" * 60)
        print("FAILURE: Could not start LightRAG server")
        print("\nTroubleshooting steps:")
        print("1. Check if port 3015 is in use: netstat -ano | findstr :3015")
        print("2. Verify .env file has OPENAI_API_KEY set")
        print("3. Check PyTorch installation: python -c 'import torch; print(torch.__version__)'")
        print("4. Try manual start: cd LightRAG-main && python -m lightrag.api.lightrag_server --port 3015")
        print("=" * 60)
        return 1

if __name__ == "__main__":
    try:
        sys.exit(main())
    except KeyboardInterrupt:
        print("\n\nInterrupted by user")
        sys.exit(130)
    except Exception as e:
        print(f"\n\nUnexpected error: {e}")
        import traceback
        traceback.print_exc()
        sys.exit(1)