Auto-commit: OCR workflow improvements, performance optimizations, and bug fixes

2026-01-11 18:21:16 +08:00
parent 642dd0ea5f
commit 1ddd49f913
97 changed files with 5909 additions and 451 deletions
--- a/start_server_comprehensive.py
+++ b/start_server_comprehensive.py
@@ -0,0 +1,348 @@
+#!/usr/bin/env python3
+"""
+Comprehensive LightRAG Server Startup Script
+Fixes all identified issues with zrun.bat failure:
+1. Port 3015 binding conflicts
+2. Missing environment variables
+3. PyTorch DLL issues (workaround)
+4. LLM configuration mismatches
+"""
+
+import os
+import sys
+import subprocess
+import time
+import signal
+import socket
+import psutil
+from pathlib import Path
+
+def kill_process_on_port(port):
+    """Kill any process using the specified port."""
+    print(f"Checking for processes on port {port}...")
+    killed = False
+    
+    try:
+        # Method 1: Use netstat to find PID
+        result = subprocess.run(
+            ["netstat", "-ano", "|", "findstr", f":{port}"],
+            shell=True,
+            capture_output=True,
+            text=True
+        )
+        
+        if result.stdout:
+            for line in result.stdout.strip().split('\n'):
+                if f":{port}" in line:
+                    parts = line.strip().split()
+                    if len(parts) >= 5:
+                        pid = parts[-1]
+                        try:
+                            pid = int(pid)
+                            print(f"  Found process {pid} using port {port}")
+                            proc = psutil.Process(pid)
+                            proc.terminate()
+                            time.sleep(1)
+                            if proc.is_running():
+                                proc.kill()
+                                print(f"  Killed process {pid}")
+                            else:
+                                print(f"  Terminated process {pid}")
+                            killed = True
+                        except (psutil.NoSuchProcess, ValueError, PermissionError) as e:
+                            print(f"  Could not kill process {pid}: {e}")
+    
+    except Exception as e:
+        print(f"  Error checking port {port}: {e}")
+    
+    # Method 2: Try taskkill with port filter
+    try:
+        subprocess.run(
+            ["powershell", "-Command", f"Get-NetTCPConnection -LocalPort {port} | ForEach-Object {{ Stop-Process -Id $_.OwningProcess -Force }}"],
+            shell=True,
+            capture_output=True
+        )
+        print(f"  Attempted PowerShell port cleanup")
+        killed = True
+    except Exception as e:
+        print(f"  PowerShell cleanup failed: {e}")
+    
+    # Method 3: Try to bind to the port to check if it's free
+    sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
+    try:
+        sock.bind(('0.0.0.0', port))
+        sock.close()
+        print(f"  Port {port} is now available")
+        return True
+    except OSError:
+        print(f"  Port {port} is still in use after cleanup attempts")
+        return killed
+    finally:
+        try:
+            sock.close()
+        except:
+            pass
+
+def check_environment_variables():
+    """Check all required environment variables."""
+    print("Checking environment variables...")
+    
+    env_file_locations = [
+        Path(".env"),
+        Path("LightRAG-main/.env"),
+        Path("LightRAG-main/.env.example"),
+        Path("../.env")
+    ]
+    
+    env_vars_required = {
+        "OPENAI_API_KEY": "DeepSeek API key for LLM",
+        "JINA_API_KEY": "Jina API key for embeddings (optional but recommended)",
+        "LLM_BINDING_API_KEY": "LLM API key (will be set from OPENAI_API_KEY if missing)"
+    }
+    
+    # Try to load from .env files
+    for env_file in env_file_locations:
+        if env_file.exists():
+            print(f"  Found .env file at {env_file}")
+            try:
+                with open(env_file, 'r') as f:
+                    for line in f:
+                        line = line.strip()
+                        if line and not line.startswith('#') and '=' in line:
+                            key, value = line.split('=', 1)
+                            os.environ[key.strip()] = value.strip()
+            except Exception as e:
+                print(f"  Error reading {env_file}: {e}")
+    
+    # Check and set required variables
+    missing_vars = []
+    for var, description in env_vars_required.items():
+        if var in os.environ and os.environ[var]:
+            print(f"  ✓ {var}: Set")
+            
+            # Ensure LLM_BINDING_API_KEY is set from OPENAI_API_KEY if needed
+            if var == "OPENAI_API_KEY" and "LLM_BINDING_API_KEY" not in os.environ:
+                os.environ["LLM_BINDING_API_KEY"] = os.environ[var]
+                print(f"    Set LLM_BINDING_API_KEY from OPENAI_API_KEY")
+        else:
+            print(f"  ✗ {var}: Missing - {description}")
+            missing_vars.append(var)
+    
+    # Special handling for JINA_API_KEY - it's optional but recommended
+    if "JINA_API_KEY" in missing_vars:
+        print(f"  ⚠ JINA_API_KEY is missing - embedding may fail")
+        # Try to use Ollama as fallback
+        os.environ["EMBEDDING_BINDING"] = "ollama"
+        print(f"    Set EMBEDDING_BINDING=ollama as fallback")
+        missing_vars.remove("JINA_API_KEY")
+    
+    if missing_vars:
+        print(f"\n⚠ Missing required environment variables: {missing_vars}")
+        print("  Some features may not work properly.")
+        return False
+    
+    return True
+
+def check_pytorch_installation():
+    """Check if PyTorch is working properly."""
+    print("Checking PyTorch installation...")
+    
+    try:
+        import torch
+        print(f"  ✓ PyTorch version: {torch.__version__}")
+        
+        # Check if CUDA is available
+        if torch.cuda.is_available():
+            print(f"  ✓ CUDA is available")
+        else:
+            print(f"  ⚠ CUDA not available - using CPU")
+        
+        # Try a simple tensor operation
+        x = torch.tensor([1.0, 2.0, 3.0])
+        y = x * 2
+        print(f"  ✓ Basic tensor operations work")
+        
+        return True
+    except Exception as e:
+        print(f"  ✗ PyTorch error: {e}")
+        print(f"  ⚠ PyTorch may have DLL issues. This may cause spaCy to fail.")
+        
+        # Try to set environment variable to work around DLL issues
+        os.environ["KMP_DUPLICATE_LIB_OK"] = "TRUE"
+        print(f"    Set KMP_DUPLICATE_LIB_OK=TRUE to work around DLL issues")
+        
+        return False
+
+def start_lightrag_server():
+    """Start the LightRAG server with proper configuration."""
+    print("\nStarting LightRAG server...")
+    
+    # Get current directory
+    current_dir = Path.cwd()
+    lightrag_dir = current_dir / "LightRAG-main"
+    
+    if not lightrag_dir.exists():
+        print(f"  ✗ LightRAG directory not found: {lightrag_dir}")
+        return False
+    
+    # Build the command
+    cmd = [
+        sys.executable,  # Use current Python interpreter
+        "-m", "lightrag.api.lightrag_server",
+        "--port", "3015",
+        "--host", "0.0.0.0",
+        "--working-dir", "rag_storage",
+        "--input-dir", "../inputs",
+        "--key", "jleu1212",  # Default API key
+        "--auto-scan-at-startup",
+        "--llm-binding", "openai",
+        "--embedding-binding", "ollama",  # Use Ollama instead of Jina to avoid API key issues
+        "--rerank-binding", "jina"
+    ]
+    
+    # Add environment variables
+    env = os.environ.copy()
+    
+    # Ensure LLM configuration uses DeepSeek
+    if "OPENAI_API_KEY" in env:
+        env["LLM_BINDING_API_KEY"] = env["OPENAI_API_KEY"]
+        # Set DeepSeek base URL
+        env["OPENAI_API_BASE"] = "https://api.deepseek.com/v1"
+        print(f"  Configured DeepSeek API: {env['OPENAI_API_BASE']}")
+    
+    print(f"  Command: {' '.join(cmd)}")
+    print(f"  Working directory: {lightrag_dir}")
+    
+    try:
+        # Change to LightRAG directory and start server
+        os.chdir(lightrag_dir)
+        
+        # Start the process
+        process = subprocess.Popen(
+            cmd,
+            env=env,
+            stdout=subprocess.PIPE,
+            stderr=subprocess.STDOUT,
+            text=True,
+            bufsize=1,
+            universal_newlines=True
+        )
+        
+        print(f"  Server started with PID: {process.pid}")
+        
+        # Monitor output for a few seconds
+        print("\nServer output (first 30 seconds):")
+        print("-" * 50)
+        
+        start_time = time.time()
+        server_started = False
+        
+        while time.time() - start_time < 30:  # Monitor for 30 seconds
+            line = process.stdout.readline()
+            if line:
+                print(line.rstrip())
+                
+                # Check for success indicators
+                if "Uvicorn running on" in line:
+                    server_started = True
+                    print("✓ Server started successfully!")
+                elif "Errno 10048" in line or "socket address" in line:
+                    print("✗ Port binding failed!")
+                    process.terminate()
+                    return False
+                elif "Application startup complete" in line:
+                    print("✓ Application started successfully!")
+            
+            # Check if process died
+            if process.poll() is not None:
+                print(f"✗ Server process died with exit code: {process.returncode}")
+                return False
+            
+            time.sleep(0.1)
+        
+        print("-" * 50)
+        
+        if server_started:
+            print(f"\n✓ LightRAG server is running on http://localhost:3015")
+            print(f"  Process PID: {process.pid}")
+            print(f"  Press Ctrl+C to stop the server")
+            
+            # Keep the process running
+            try:
+                process.wait()
+            except KeyboardInterrupt:
+                print("\nStopping server...")
+                process.terminate()
+                process.wait()
+                print("Server stopped.")
+            
+            return True
+        else:
+            print("\n✗ Server may not have started properly")
+            process.terminate()
+            return False
+            
+    except Exception as e:
+        print(f"✗ Error starting server: {e}")
+        return False
+    finally:
+        # Return to original directory
+        os.chdir(current_dir)
+
+def main():
+    """Main function to start the LightRAG server with comprehensive fixes."""
+    print("=" * 60)
+    print("LightRAG Server Startup - Comprehensive Fix")
+    print("=" * 60)
+    
+    # Step 1: Kill processes on port 3015
+    print("\n[1/4] Port cleanup...")
+    if not kill_process_on_port(3015):
+        print("⚠ Could not free port 3015. Trying alternative port...")
+        # Could implement alternative port logic here
+    
+    # Step 2: Check environment variables
+    print("\n[2/4] Environment setup...")
+    env_ok = check_environment_variables()
+    
+    # Step 3: Check PyTorch
+    print("\n[3/4] Dependency checks...")
+    pytorch_ok = check_pytorch_installation()
+    
+    if not pytorch_ok:
+        print("⚠ PyTorch has issues - entity extraction may fail")
+        print("  Consider reinstalling PyTorch or using CPU-only version")
+    
+    # Step 4: Start server
+    print("\n[4/4] Starting server...")
+    success = start_lightrag_server()
+    
+    if success:
+        print("\n" + "=" * 60)
+        print("SUCCESS: LightRAG server is running!")
+        print("Access the Web UI at: http://localhost:3015")
+        print("API documentation at: http://localhost:3015/docs")
+        print("=" * 60)
+        return 0
+    else:
+        print("\n" + "=" * 60)
+        print("FAILURE: Could not start LightRAG server")
+        print("\nTroubleshooting steps:")
+        print("1. Check if port 3015 is in use: netstat -ano | findstr :3015")
+        print("2. Verify .env file has OPENAI_API_KEY set")
+        print("3. Check PyTorch installation: python -c 'import torch; print(torch.__version__)'")
+        print("4. Try manual start: cd LightRAG-main && python -m lightrag.api.lightrag_server --port 3015")
+        print("=" * 60)
+        return 1
+
+if __name__ == "__main__":
+    try:
+        sys.exit(main())
+    except KeyboardInterrupt:
+        print("\n\nInterrupted by user")
+        sys.exit(130)
+    except Exception as e:
+        print(f"\n\nUnexpected error: {e}")
+        import traceback
+        traceback.print_exc()
+        sys.exit(1)