#!/usr/bin/env python3 """ Comprehensive LightRAG Server Startup Script Fixes all identified issues with zrun.bat failure: 1. Port 3015 binding conflicts 2. Missing environment variables 3. PyTorch DLL issues (workaround) 4. LLM configuration mismatches """ import os import sys import subprocess import time import signal import socket import psutil from pathlib import Path def kill_process_on_port(port): """Kill any process using the specified port.""" print(f"Checking for processes on port {port}...") killed = False try: # Method 1: Use netstat to find PID result = subprocess.run( ["netstat", "-ano", "|", "findstr", f":{port}"], shell=True, capture_output=True, text=True ) if result.stdout: for line in result.stdout.strip().split('\n'): if f":{port}" in line: parts = line.strip().split() if len(parts) >= 5: pid = parts[-1] try: pid = int(pid) print(f" Found process {pid} using port {port}") proc = psutil.Process(pid) proc.terminate() time.sleep(1) if proc.is_running(): proc.kill() print(f" Killed process {pid}") else: print(f" Terminated process {pid}") killed = True except (psutil.NoSuchProcess, ValueError, PermissionError) as e: print(f" Could not kill process {pid}: {e}") except Exception as e: print(f" Error checking port {port}: {e}") # Method 2: Try taskkill with port filter try: subprocess.run( ["powershell", "-Command", f"Get-NetTCPConnection -LocalPort {port} | ForEach-Object {{ Stop-Process -Id $_.OwningProcess -Force }}"], shell=True, capture_output=True ) print(f" Attempted PowerShell port cleanup") killed = True except Exception as e: print(f" PowerShell cleanup failed: {e}") # Method 3: Try to bind to the port to check if it's free sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) try: sock.bind(('0.0.0.0', port)) sock.close() print(f" Port {port} is now available") return True except OSError: print(f" Port {port} is still in use after cleanup attempts") return killed finally: try: sock.close() except: pass def check_environment_variables(): """Check all required environment variables.""" print("Checking environment variables...") env_file_locations = [ Path(".env"), Path("LightRAG-main/.env"), Path("LightRAG-main/.env.example"), Path("../.env") ] env_vars_required = { "OPENAI_API_KEY": "DeepSeek API key for LLM", "JINA_API_KEY": "Jina API key for embeddings (optional but recommended)", "LLM_BINDING_API_KEY": "LLM API key (will be set from OPENAI_API_KEY if missing)" } # Try to load from .env files for env_file in env_file_locations: if env_file.exists(): print(f" Found .env file at {env_file}") try: with open(env_file, 'r') as f: for line in f: line = line.strip() if line and not line.startswith('#') and '=' in line: key, value = line.split('=', 1) os.environ[key.strip()] = value.strip() except Exception as e: print(f" Error reading {env_file}: {e}") # Check and set required variables missing_vars = [] for var, description in env_vars_required.items(): if var in os.environ and os.environ[var]: print(f" ✓ {var}: Set") # Ensure LLM_BINDING_API_KEY is set from OPENAI_API_KEY if needed if var == "OPENAI_API_KEY" and "LLM_BINDING_API_KEY" not in os.environ: os.environ["LLM_BINDING_API_KEY"] = os.environ[var] print(f" Set LLM_BINDING_API_KEY from OPENAI_API_KEY") else: print(f" ✗ {var}: Missing - {description}") missing_vars.append(var) # Special handling for JINA_API_KEY - it's optional but recommended if "JINA_API_KEY" in missing_vars: print(f" ⚠ JINA_API_KEY is missing - embedding may fail") # Try to use Ollama as fallback os.environ["EMBEDDING_BINDING"] = "ollama" print(f" Set EMBEDDING_BINDING=ollama as fallback") missing_vars.remove("JINA_API_KEY") if missing_vars: print(f"\n⚠ Missing required environment variables: {missing_vars}") print(" Some features may not work properly.") return False return True def check_pytorch_installation(): """Check if PyTorch is working properly.""" print("Checking PyTorch installation...") try: import torch print(f" ✓ PyTorch version: {torch.__version__}") # Check if CUDA is available if torch.cuda.is_available(): print(f" ✓ CUDA is available") else: print(f" ⚠ CUDA not available - using CPU") # Try a simple tensor operation x = torch.tensor([1.0, 2.0, 3.0]) y = x * 2 print(f" ✓ Basic tensor operations work") return True except Exception as e: print(f" ✗ PyTorch error: {e}") print(f" ⚠ PyTorch may have DLL issues. This may cause spaCy to fail.") # Try to set environment variable to work around DLL issues os.environ["KMP_DUPLICATE_LIB_OK"] = "TRUE" print(f" Set KMP_DUPLICATE_LIB_OK=TRUE to work around DLL issues") return False def start_lightrag_server(): """Start the LightRAG server with proper configuration.""" print("\nStarting LightRAG server...") # Get current directory current_dir = Path.cwd() lightrag_dir = current_dir / "LightRAG-main" if not lightrag_dir.exists(): print(f" ✗ LightRAG directory not found: {lightrag_dir}") return False # Build the command cmd = [ sys.executable, # Use current Python interpreter "-m", "lightrag.api.lightrag_server", "--port", "3015", "--host", "0.0.0.0", "--working-dir", "rag_storage", "--input-dir", "../inputs", "--key", "jleu1212", # Default API key "--auto-scan-at-startup", "--llm-binding", "openai", "--embedding-binding", "ollama", # Use Ollama instead of Jina to avoid API key issues "--rerank-binding", "jina" ] # Add environment variables env = os.environ.copy() # Ensure LLM configuration uses DeepSeek if "OPENAI_API_KEY" in env: env["LLM_BINDING_API_KEY"] = env["OPENAI_API_KEY"] # Set DeepSeek base URL env["OPENAI_API_BASE"] = "https://api.deepseek.com/v1" print(f" Configured DeepSeek API: {env['OPENAI_API_BASE']}") print(f" Command: {' '.join(cmd)}") print(f" Working directory: {lightrag_dir}") try: # Change to LightRAG directory and start server os.chdir(lightrag_dir) # Start the process process = subprocess.Popen( cmd, env=env, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, text=True, bufsize=1, universal_newlines=True ) print(f" Server started with PID: {process.pid}") # Monitor output for a few seconds print("\nServer output (first 30 seconds):") print("-" * 50) start_time = time.time() server_started = False while time.time() - start_time < 30: # Monitor for 30 seconds line = process.stdout.readline() if line: print(line.rstrip()) # Check for success indicators if "Uvicorn running on" in line: server_started = True print("✓ Server started successfully!") elif "Errno 10048" in line or "socket address" in line: print("✗ Port binding failed!") process.terminate() return False elif "Application startup complete" in line: print("✓ Application started successfully!") # Check if process died if process.poll() is not None: print(f"✗ Server process died with exit code: {process.returncode}") return False time.sleep(0.1) print("-" * 50) if server_started: print(f"\n✓ LightRAG server is running on http://localhost:3015") print(f" Process PID: {process.pid}") print(f" Press Ctrl+C to stop the server") # Keep the process running try: process.wait() except KeyboardInterrupt: print("\nStopping server...") process.terminate() process.wait() print("Server stopped.") return True else: print("\n✗ Server may not have started properly") process.terminate() return False except Exception as e: print(f"✗ Error starting server: {e}") return False finally: # Return to original directory os.chdir(current_dir) def main(): """Main function to start the LightRAG server with comprehensive fixes.""" print("=" * 60) print("LightRAG Server Startup - Comprehensive Fix") print("=" * 60) # Step 1: Kill processes on port 3015 print("\n[1/4] Port cleanup...") if not kill_process_on_port(3015): print("⚠ Could not free port 3015. Trying alternative port...") # Could implement alternative port logic here # Step 2: Check environment variables print("\n[2/4] Environment setup...") env_ok = check_environment_variables() # Step 3: Check PyTorch print("\n[3/4] Dependency checks...") pytorch_ok = check_pytorch_installation() if not pytorch_ok: print("⚠ PyTorch has issues - entity extraction may fail") print(" Consider reinstalling PyTorch or using CPU-only version") # Step 4: Start server print("\n[4/4] Starting server...") success = start_lightrag_server() if success: print("\n" + "=" * 60) print("SUCCESS: LightRAG server is running!") print("Access the Web UI at: http://localhost:3015") print("API documentation at: http://localhost:3015/docs") print("=" * 60) return 0 else: print("\n" + "=" * 60) print("FAILURE: Could not start LightRAG server") print("\nTroubleshooting steps:") print("1. Check if port 3015 is in use: netstat -ano | findstr :3015") print("2. Verify .env file has OPENAI_API_KEY set") print("3. Check PyTorch installation: python -c 'import torch; print(torch.__version__)'") print("4. Try manual start: cd LightRAG-main && python -m lightrag.api.lightrag_server --port 3015") print("=" * 60) return 1 if __name__ == "__main__": try: sys.exit(main()) except KeyboardInterrupt: print("\n\nInterrupted by user") sys.exit(130) except Exception as e: print(f"\n\nUnexpected error: {e}") import traceback traceback.print_exc() sys.exit(1)