Auto-commit: OCR workflow improvements, performance optimizations, and bug fixes
This commit is contained in:
273
start_server_fixed_improved.py
Normal file
273
start_server_fixed_improved.py
Normal file
@@ -0,0 +1,273 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Improved server starter that fixes all identified issues:
|
||||
1. Better port conflict handling
|
||||
2. Proper LLM configuration loading
|
||||
3. Correct .env file path handling
|
||||
4. Better error reporting
|
||||
"""
|
||||
|
||||
import os
|
||||
import sys
|
||||
import subprocess
|
||||
import time
|
||||
import socket
|
||||
import signal
|
||||
|
||||
def kill_process_on_port(port):
|
||||
"""Kill any process using the specified port"""
|
||||
# Try psutil method first if available
|
||||
psutil_available = False
|
||||
try:
|
||||
import psutil
|
||||
psutil_available = True
|
||||
except ImportError:
|
||||
pass
|
||||
|
||||
if psutil_available:
|
||||
try:
|
||||
for proc in psutil.process_iter(['pid', 'name']):
|
||||
try:
|
||||
for conn in proc.connections(kind='inet'):
|
||||
if conn.laddr.port == port:
|
||||
print(f"Found process {proc.pid} ({proc.name()}) using port {port}")
|
||||
proc.terminate()
|
||||
proc.wait(timeout=5)
|
||||
print(f"Terminated process {proc.pid}")
|
||||
return True
|
||||
except (psutil.NoSuchProcess, psutil.AccessDenied, psutil.ZombieProcess):
|
||||
pass
|
||||
except Exception as e:
|
||||
print(f"psutil method failed: {e}")
|
||||
|
||||
# Fallback to netstat method (works on Windows without psutil)
|
||||
try:
|
||||
result = subprocess.run(
|
||||
f'netstat -ano | findstr :{port}',
|
||||
capture_output=True,
|
||||
text=True,
|
||||
shell=True
|
||||
)
|
||||
if result.stdout:
|
||||
for line in result.stdout.strip().split('\n'):
|
||||
if f':{port}' in line:
|
||||
parts = line.strip().split()
|
||||
if len(parts) >= 5:
|
||||
pid = parts[-1]
|
||||
print(f"Found process {pid} using port {port}")
|
||||
subprocess.run(f'taskkill /F /PID {pid}',
|
||||
capture_output=True, shell=True)
|
||||
print(f"Killed process {pid}")
|
||||
return True
|
||||
except Exception as e:
|
||||
print(f"netstat method failed: {e}")
|
||||
|
||||
return False
|
||||
|
||||
def is_port_in_use(port):
|
||||
"""Check if a port is in use"""
|
||||
with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s:
|
||||
try:
|
||||
s.settimeout(1)
|
||||
s.bind(('0.0.0.0', port))
|
||||
return False
|
||||
except socket.error:
|
||||
return True
|
||||
|
||||
def load_env_file(env_path):
|
||||
"""Load environment variables from .env file"""
|
||||
config = {}
|
||||
try:
|
||||
with open(env_path, 'r', encoding='utf-8') as f:
|
||||
for line in f:
|
||||
line = line.strip()
|
||||
if line and not line.startswith('#'):
|
||||
if '=' in line:
|
||||
key, value = line.split('=', 1)
|
||||
config[key.strip()] = value.strip()
|
||||
print(f"Loaded {len(config)} configuration variables from {env_path}")
|
||||
return config
|
||||
except FileNotFoundError:
|
||||
print(f"Warning: .env file not found at {env_path}")
|
||||
return {}
|
||||
except Exception as e:
|
||||
print(f"Error reading .env file: {e}")
|
||||
return {}
|
||||
|
||||
def main():
|
||||
"""Start the LightRAG server with all fixes applied"""
|
||||
print("Starting LightRAG server with improved configuration...")
|
||||
|
||||
# Set environment variables for UTF-8 encoding
|
||||
env = os.environ.copy()
|
||||
env['PYTHONIOENCODING'] = 'utf-8'
|
||||
env['PYTHONUTF8'] = '1'
|
||||
|
||||
# Determine the correct .env file path
|
||||
# First try current directory, then LightRAG-main directory
|
||||
env_paths = ['.env', 'LightRAG-main/.env']
|
||||
config = {}
|
||||
|
||||
for env_path in env_paths:
|
||||
if os.path.exists(env_path):
|
||||
config = load_env_file(env_path)
|
||||
if config:
|
||||
print(f"Using .env file from: {env_path}")
|
||||
break
|
||||
|
||||
if not config:
|
||||
print("Warning: No .env file found, using defaults")
|
||||
|
||||
# Ensure critical LLM settings have defaults to prevent accidental OpenAI usage
|
||||
if 'LLM_BINDING_HOST' not in config:
|
||||
config['LLM_BINDING_HOST'] = 'https://api.deepseek.com/v1'
|
||||
print("Warning: LLM_BINDING_HOST not set, defaulting to DeepSeek API")
|
||||
|
||||
if 'OPENAI_API_BASE' not in config:
|
||||
config['OPENAI_API_BASE'] = config.get('LLM_BINDING_HOST', 'https://api.deepseek.com/v1')
|
||||
|
||||
if 'LLM_MODEL' not in config:
|
||||
config['LLM_MODEL'] = 'deepseek-chat'
|
||||
|
||||
# CRITICAL FIX: Ensure LLM_BINDING_API_KEY is set from OPENAI_API_KEY if not present
|
||||
if 'LLM_BINDING_API_KEY' not in config and 'OPENAI_API_KEY' in config:
|
||||
config['LLM_BINDING_API_KEY'] = config['OPENAI_API_KEY']
|
||||
print("Info: Set LLM_BINDING_API_KEY from OPENAI_API_KEY")
|
||||
|
||||
if 'LLM_BINDING_API_KEY' not in config and 'OPENAI_API_KEY' not in config:
|
||||
print("ERROR: LLM_BINDING_API_KEY or OPENAI_API_KEY must be set in .env")
|
||||
sys.exit(1)
|
||||
|
||||
# Get configuration values with defaults
|
||||
port = int(config.get('PORT', '3015'))
|
||||
host = config.get('HOST', '0.0.0.0')
|
||||
llm_binding = config.get('LLM_BINDING', 'openai')
|
||||
embedding_binding = config.get('EMBEDDING_BINDING', 'ollama')
|
||||
rerank_binding = config.get('RERANK_BINDING', 'jina')
|
||||
|
||||
# Check and kill any process using the port
|
||||
print(f"\nChecking port {port}...")
|
||||
if is_port_in_use(port):
|
||||
print(f"Port {port} is in use. Attempting to kill existing process...")
|
||||
if kill_process_on_port(port):
|
||||
print(f"Successfully cleared port {port}")
|
||||
time.sleep(2) # Wait for port to be released
|
||||
else:
|
||||
print(f"Warning: Could not kill process on port {port}")
|
||||
print("Trying to start server anyway...")
|
||||
|
||||
# Set LLM-related environment variables
|
||||
llm_keys = [
|
||||
'LLM_BINDING_HOST',
|
||||
'LLM_BINDING_API_KEY',
|
||||
'LLM_MODEL',
|
||||
'OPENAI_API_KEY',
|
||||
'OPENAI_API_BASE',
|
||||
'ENABLE_LLM_CACHE',
|
||||
'ENABLE_LLM_CACHE_FOR_EXTRACT',
|
||||
'TIMEOUT',
|
||||
'TEMPERATURE',
|
||||
'MAX_ASYNC',
|
||||
'MAX_TOKENS',
|
||||
'OPTIMIZE_ENTITY_EXTRACTION'
|
||||
]
|
||||
|
||||
for key in llm_keys:
|
||||
if key in config:
|
||||
env[key] = config[key]
|
||||
# Also set as os.environ for the current process
|
||||
os.environ[key] = config[key]
|
||||
|
||||
# Set embedding-related environment variables
|
||||
embedding_keys = [
|
||||
'EMBEDDING_MODEL',
|
||||
'EMBEDDING_DIM',
|
||||
'EMBEDDING_BINDING_HOST',
|
||||
'EMBEDDING_BATCH_NUM',
|
||||
'EMBEDDING_FUNC_MAX_ASYNC'
|
||||
]
|
||||
for key in embedding_keys:
|
||||
if key in config:
|
||||
env[key] = config[key]
|
||||
|
||||
# Set rerank-related environment variables
|
||||
rerank_keys = [
|
||||
'RERANK_MODEL'
|
||||
]
|
||||
for key in rerank_keys:
|
||||
if key in config:
|
||||
env[key] = config[key]
|
||||
|
||||
# Build command
|
||||
cmd = [
|
||||
sys.executable, '-m', 'lightrag.api.lightrag_server',
|
||||
'--port', str(port),
|
||||
'--host', host,
|
||||
'--working-dir', 'rag_storage',
|
||||
'--input-dir', '../inputs',
|
||||
'--key', 'jleu1212',
|
||||
'--auto-scan-at-startup',
|
||||
'--llm-binding', llm_binding,
|
||||
'--embedding-binding', embedding_binding,
|
||||
'--rerank-binding', rerank_binding
|
||||
]
|
||||
|
||||
print(f"\nServer Configuration:")
|
||||
print(f" Port: {port}")
|
||||
print(f" Host: {host}")
|
||||
print(f" LLM Binding: {llm_binding}")
|
||||
print(f" LLM Host: {config.get('LLM_BINDING_HOST', 'Not set')}")
|
||||
print(f" LLM Model: {config.get('LLM_MODEL', 'Not set')}")
|
||||
print(f" API Key: {'Set' if 'LLM_BINDING_API_KEY' in config else 'Not set'}")
|
||||
print(f"\nCommand: {' '.join(cmd)}")
|
||||
print(f"Starting server on http://{host}:{port}")
|
||||
|
||||
try:
|
||||
# Change to LightRAG-main directory BEFORE starting the server
|
||||
os.chdir('LightRAG-main')
|
||||
print(f"Changed to directory: {os.getcwd()}")
|
||||
|
||||
# Start the server
|
||||
process = subprocess.Popen(
|
||||
cmd,
|
||||
env=env,
|
||||
stdout=subprocess.PIPE,
|
||||
stderr=subprocess.STDOUT,
|
||||
text=True,
|
||||
encoding='utf-8',
|
||||
errors='replace'
|
||||
)
|
||||
|
||||
print("\nServer output:")
|
||||
print("-" * 50)
|
||||
|
||||
# Read and print output
|
||||
try:
|
||||
for line in iter(process.stdout.readline, ''):
|
||||
# Filter out problematic Unicode characters
|
||||
cleaned_line = ''.join(c if ord(c) < 128 else '?' for c in line)
|
||||
print(cleaned_line.rstrip())
|
||||
|
||||
# Check for common errors
|
||||
if "Errno 10048" in line or "address already in use" in line.lower():
|
||||
print("\nERROR: Port binding failed. Another process may be using the port.")
|
||||
print("Try running 'netstat -ano | findstr :3015' to find the process.")
|
||||
process.terminate()
|
||||
return 1
|
||||
|
||||
except KeyboardInterrupt:
|
||||
print("\nServer stopped by user")
|
||||
process.terminate()
|
||||
|
||||
process.wait()
|
||||
|
||||
except Exception as e:
|
||||
print(f"Error starting server: {e}")
|
||||
import traceback
|
||||
traceback.print_exc()
|
||||
return 1
|
||||
|
||||
return 0
|
||||
|
||||
if __name__ == "__main__":
|
||||
sys.exit(main())
|
||||
Reference in New Issue
Block a user