Auto-commit: OCR workflow improvements, performance optimizations, and bug fixes

This commit is contained in:
2026-01-11 18:21:16 +08:00
parent 642dd0ea5f
commit 1ddd49f913
97 changed files with 5909 additions and 451 deletions

View File

@@ -173,169 +173,253 @@ def display_splash_screen(args: argparse.Namespace) -> None:
Args:
args: Parsed command line arguments
"""
# Banner
# Banner
top_border = "╔══════════════════════════════════════════════════════════════╗"
bottom_border = "╚══════════════════════════════════════════════════════════════╝"
width = len(top_border) - 4 # width inside the borders
# Windows console has encoding issues with Unicode characters
# We'll use a simplified version that works on all platforms
print(f"\nLightRAG Server v{core_version}/{api_version}")
print("Fast, Lightweight RAG Server Implementation")
print("=" * 60)
# Server Configuration
print("\nServer Configuration:")
print(f" Host: {args.host}")
print(f" Port: {args.port}")
print(f" Workers: {args.workers}")
print(f" Timeout: {args.timeout}")
print(f" CORS Origins: {args.cors_origins}")
print(f" SSL Enabled: {args.ssl}")
if args.ssl:
print(f" SSL Cert: {args.ssl_certfile}")
print(f" SSL Key: {args.ssl_keyfile}")
print(f" Ollama Emulating Model: {ollama_server_infos.LIGHTRAG_MODEL}")
print(f" Log Level: {args.log_level}")
print(f" Verbose Debug: {args.verbose}")
print(f" History Turns: {args.history_turns}")
print(f" API Key: {'Set' if args.key else 'Not Set'}")
print(f" JWT Auth: {'Enabled' if args.auth_accounts else 'Disabled'}")
line1_text = f"LightRAG Server v{core_version}/{api_version}"
line2_text = "Fast, Lightweight RAG Server Implementation"
# Directory Configuration
print("\nDirectory Configuration:")
print(f" Working Directory: {args.working_dir}")
print(f" Input Directory: {args.input_dir}")
line1 = f"{line1_text.center(width)}"
line2 = f"{line2_text.center(width)}"
# LLM Configuration
print("\nLLM Configuration:")
print(f" Binding: {args.llm_binding}")
print(f" Host: {args.llm_binding_host}")
print(f" Model: {args.llm_model}")
print(f" Max Async for LLM: {args.max_async}")
print(f" Summary Context Size: {args.summary_context_size}")
print(f" LLM Cache Enabled: {args.enable_llm_cache}")
print(f" LLM Cache for Extraction Enabled: {args.enable_llm_cache_for_extract}")
banner = f"""
{top_border}
{line1}
{line2}
{bottom_border}
"""
ASCIIColors.cyan(banner)
# Embedding Configuration
print("\nEmbedding Configuration:")
print(f" Binding: {args.embedding_binding}")
print(f" Host: {args.embedding_binding_host}")
print(f" Model: {args.embedding_model}")
print(f" Dimensions: {args.embedding_dim}")
# RAG Configuration
print("\nRAG Configuration:")
print(f" Summary Language: {args.summary_language}")
print(f" Entity Types: {args.entity_types}")
print(f" Max Parallel Insert: {args.max_parallel_insert}")
print(f" Chunk Size: {args.chunk_size}")
print(f" Chunk Overlap Size: {args.chunk_overlap_size}")
print(f" Cosine Threshold: {args.cosine_threshold}")
print(f" Top-K: {args.top_k}")
print(f" Force LLM Summary on Merge: {get_env_value('FORCE_LLM_SUMMARY_ON_MERGE', DEFAULT_FORCE_LLM_SUMMARY_ON_MERGE, int)}")
# Storage Configuration
print("\nStorage Configuration:")
print(f" KV Storage: {args.kv_storage}")
print(f" Vector Storage: {args.vector_storage}")
print(f" Graph Storage: {args.graph_storage}")
print(f" Document Status Storage: {args.doc_status_storage}")
print(f" Workspace: {args.workspace if args.workspace else '-'}")
# Server Status
print("\nServer starting up...\n")
# Server Access Information
protocol = "https" if args.ssl else "http"
if args.host == "0.0.0.0":
print("\nServer Access Information:")
print(f" WebUI (local): {protocol}://localhost:{args.port}")
print(f" Remote Access: {protocol}://<your-ip-address>:{args.port}")
print(f" API Documentation (local): {protocol}://localhost:{args.port}/docs")
print(f" Alternative Documentation (local): {protocol}://localhost:{args.port}/redoc")
print("\nNote:")
print(" Since the server is running on 0.0.0.0:")
print(" - Use 'localhost' or '127.0.0.1' for local access")
print(" - Use your machine's IP address for remote access")
print(" - To find your IP address:")
print(" • Windows: Run 'ipconfig' in terminal")
print(" • Linux/Mac: Run 'ifconfig' or 'ip addr' in terminal")
else:
base_url = f"{protocol}://{args.host}:{args.port}"
print("\nServer Access Information:")
print(f" WebUI (local): {base_url}")
print(f" API Documentation: {base_url}/docs")
print(f" Alternative Documentation: {base_url}/redoc")
# Security Notice
if args.key:
print("\nSecurity Notice:")
print(" API Key authentication is enabled.")
print(" Make sure to include the X-API-Key header in all your requests.")
if args.auth_accounts:
print("\nSecurity Notice:")
print(" JWT authentication is enabled.")
print(" Make sure to login before making the request, and include the 'Authorization' in the header.")
# Ensure splash output flush to system log
sys.stdout.flush()
# Server Configuration
ASCIIColors.magenta("\n📡 Server Configuration:")
ASCIIColors.white(" ├─ Host: ", end="")
ASCIIColors.magenta("\n[Server Configuration]:")
ASCIIColors.white(" |- Host: ", end="")
ASCIIColors.yellow(f"{args.host}")
ASCIIColors.white(" ├─ Port: ", end="")
ASCIIColors.white(" |- Port: ", end="")
ASCIIColors.yellow(f"{args.port}")
ASCIIColors.white(" ├─ Workers: ", end="")
ASCIIColors.white(" |- Workers: ", end="")
ASCIIColors.yellow(f"{args.workers}")
ASCIIColors.white(" ├─ Timeout: ", end="")
ASCIIColors.white(" |- Timeout: ", end="")
ASCIIColors.yellow(f"{args.timeout}")
ASCIIColors.white(" ├─ CORS Origins: ", end="")
ASCIIColors.white(" |- CORS Origins: ", end="")
ASCIIColors.yellow(f"{args.cors_origins}")
ASCIIColors.white(" ├─ SSL Enabled: ", end="")
ASCIIColors.white(" |- SSL Enabled: ", end="")
ASCIIColors.yellow(f"{args.ssl}")
if args.ssl:
ASCIIColors.white(" ├─ SSL Cert: ", end="")
ASCIIColors.white(" |- SSL Cert: ", end="")
ASCIIColors.yellow(f"{args.ssl_certfile}")
ASCIIColors.white(" ├─ SSL Key: ", end="")
ASCIIColors.white(" |- SSL Key: ", end="")
ASCIIColors.yellow(f"{args.ssl_keyfile}")
ASCIIColors.white(" ├─ Ollama Emulating Model: ", end="")
ASCIIColors.white(" |- Ollama Emulating Model: ", end="")
ASCIIColors.yellow(f"{ollama_server_infos.LIGHTRAG_MODEL}")
ASCIIColors.white(" ├─ Log Level: ", end="")
ASCIIColors.white(" |- Log Level: ", end="")
ASCIIColors.yellow(f"{args.log_level}")
ASCIIColors.white(" ├─ Verbose Debug: ", end="")
ASCIIColors.white(" |- Verbose Debug: ", end="")
ASCIIColors.yellow(f"{args.verbose}")
ASCIIColors.white(" ├─ History Turns: ", end="")
ASCIIColors.white(" |- History Turns: ", end="")
ASCIIColors.yellow(f"{args.history_turns}")
ASCIIColors.white(" ├─ API Key: ", end="")
ASCIIColors.white(" |- API Key: ", end="")
ASCIIColors.yellow("Set" if args.key else "Not Set")
ASCIIColors.white(" └─ JWT Auth: ", end="")
ASCIIColors.white(" |- JWT Auth: ", end="")
ASCIIColors.yellow("Enabled" if args.auth_accounts else "Disabled")
# Directory Configuration
ASCIIColors.magenta("\n📂 Directory Configuration:")
ASCIIColors.white(" ├─ Working Directory: ", end="")
ASCIIColors.magenta("\n[Directory Configuration]:")
ASCIIColors.white(" |- Working Directory: ", end="")
ASCIIColors.yellow(f"{args.working_dir}")
ASCIIColors.white(" └─ Input Directory: ", end="")
ASCIIColors.white(" |- Input Directory: ", end="")
ASCIIColors.yellow(f"{args.input_dir}")
# LLM Configuration
ASCIIColors.magenta("\n🤖 LLM Configuration:")
ASCIIColors.white(" ├─ Binding: ", end="")
ASCIIColors.magenta("\n[LLM Configuration]:")
ASCIIColors.white(" |- Binding: ", end="")
ASCIIColors.yellow(f"{args.llm_binding}")
ASCIIColors.white(" ├─ Host: ", end="")
ASCIIColors.white(" |- Host: ", end="")
ASCIIColors.yellow(f"{args.llm_binding_host}")
ASCIIColors.white(" ├─ Model: ", end="")
ASCIIColors.white(" |- Model: ", end="")
ASCIIColors.yellow(f"{args.llm_model}")
ASCIIColors.white(" ├─ Max Async for LLM: ", end="")
ASCIIColors.white(" |- Max Async for LLM: ", end="")
ASCIIColors.yellow(f"{args.max_async}")
ASCIIColors.white(" ├─ Summary Context Size: ", end="")
ASCIIColors.white(" |- Summary Context Size: ", end="")
ASCIIColors.yellow(f"{args.summary_context_size}")
ASCIIColors.white(" ├─ LLM Cache Enabled: ", end="")
ASCIIColors.white(" |- LLM Cache Enabled: ", end="")
ASCIIColors.yellow(f"{args.enable_llm_cache}")
ASCIIColors.white(" └─ LLM Cache for Extraction Enabled: ", end="")
ASCIIColors.white(" |- LLM Cache for Extraction Enabled: ", end="")
ASCIIColors.yellow(f"{args.enable_llm_cache_for_extract}")
# Embedding Configuration
ASCIIColors.magenta("\n📊 Embedding Configuration:")
ASCIIColors.white(" ├─ Binding: ", end="")
ASCIIColors.magenta("\n[Embedding Configuration]:")
ASCIIColors.white(" |- Binding: ", end="")
ASCIIColors.yellow(f"{args.embedding_binding}")
ASCIIColors.white(" ├─ Host: ", end="")
ASCIIColors.white(" |- Host: ", end="")
ASCIIColors.yellow(f"{args.embedding_binding_host}")
ASCIIColors.white(" ├─ Model: ", end="")
ASCIIColors.white(" |- Model: ", end="")
ASCIIColors.yellow(f"{args.embedding_model}")
ASCIIColors.white(" └─ Dimensions: ", end="")
ASCIIColors.white(" |- Dimensions: ", end="")
ASCIIColors.yellow(f"{args.embedding_dim}")
# RAG Configuration
ASCIIColors.magenta("\n⚙️ RAG Configuration:")
ASCIIColors.white(" ├─ Summary Language: ", end="")
ASCIIColors.magenta("\n[RAG Configuration]:")
ASCIIColors.white(" |- Summary Language: ", end="")
ASCIIColors.yellow(f"{args.summary_language}")
ASCIIColors.white(" ├─ Entity Types: ", end="")
ASCIIColors.white(" |- Entity Types: ", end="")
ASCIIColors.yellow(f"{args.entity_types}")
ASCIIColors.white(" ├─ Max Parallel Insert: ", end="")
ASCIIColors.white(" |- Max Parallel Insert: ", end="")
ASCIIColors.yellow(f"{args.max_parallel_insert}")
ASCIIColors.white(" ├─ Chunk Size: ", end="")
ASCIIColors.white(" |- Chunk Size: ", end="")
ASCIIColors.yellow(f"{args.chunk_size}")
ASCIIColors.white(" ├─ Chunk Overlap Size: ", end="")
ASCIIColors.white(" |- Chunk Overlap Size: ", end="")
ASCIIColors.yellow(f"{args.chunk_overlap_size}")
ASCIIColors.white(" ├─ Cosine Threshold: ", end="")
ASCIIColors.white(" |- Cosine Threshold: ", end="")
ASCIIColors.yellow(f"{args.cosine_threshold}")
ASCIIColors.white(" ├─ Top-K: ", end="")
ASCIIColors.white(" |- Top-K: ", end="")
ASCIIColors.yellow(f"{args.top_k}")
ASCIIColors.white(" └─ Force LLM Summary on Merge: ", end="")
ASCIIColors.white(" |- Force LLM Summary on Merge: ", end="")
ASCIIColors.yellow(
f"{get_env_value('FORCE_LLM_SUMMARY_ON_MERGE', DEFAULT_FORCE_LLM_SUMMARY_ON_MERGE, int)}"
)
# System Configuration
ASCIIColors.magenta("\n💾 Storage Configuration:")
ASCIIColors.white(" ├─ KV Storage: ", end="")
ASCIIColors.magenta("\n[Storage Configuration]:")
ASCIIColors.white(" |- KV Storage: ", end="")
ASCIIColors.yellow(f"{args.kv_storage}")
ASCIIColors.white(" ├─ Vector Storage: ", end="")
ASCIIColors.white(" |- Vector Storage: ", end="")
ASCIIColors.yellow(f"{args.vector_storage}")
ASCIIColors.white(" ├─ Graph Storage: ", end="")
ASCIIColors.white(" |- Graph Storage: ", end="")
ASCIIColors.yellow(f"{args.graph_storage}")
ASCIIColors.white(" ├─ Document Status Storage: ", end="")
ASCIIColors.white(" |- Document Status Storage: ", end="")
ASCIIColors.yellow(f"{args.doc_status_storage}")
ASCIIColors.white(" └─ Workspace: ", end="")
ASCIIColors.white(" |- Workspace: ", end="")
ASCIIColors.yellow(f"{args.workspace if args.workspace else '-'}")
# Server Status
ASCIIColors.green("\nServer starting up...\n")
ASCIIColors.green("\n[Server starting up...]\n")
# Server Access Information
protocol = "https" if args.ssl else "http"
if args.host == "0.0.0.0":
ASCIIColors.magenta("\n🌐 Server Access Information:")
ASCIIColors.white(" ├─ WebUI (local): ", end="")
ASCIIColors.magenta("\n[Server Access Information]:")
ASCIIColors.white(" |- WebUI (local): ", end="")
ASCIIColors.yellow(f"{protocol}://localhost:{args.port}")
ASCIIColors.white(" ├─ Remote Access: ", end="")
ASCIIColors.white(" |- Remote Access: ", end="")
ASCIIColors.yellow(f"{protocol}://<your-ip-address>:{args.port}")
ASCIIColors.white(" ├─ API Documentation (local): ", end="")
ASCIIColors.white(" |- API Documentation (local): ", end="")
ASCIIColors.yellow(f"{protocol}://localhost:{args.port}/docs")
ASCIIColors.white(" └─ Alternative Documentation (local): ", end="")
ASCIIColors.white(" |- Alternative Documentation (local): ", end="")
ASCIIColors.yellow(f"{protocol}://localhost:{args.port}/redoc")
ASCIIColors.magenta("\n📝 Note:")
ASCIIColors.magenta("\n[Note]:")
ASCIIColors.cyan(""" Since the server is running on 0.0.0.0:
- Use 'localhost' or '127.0.0.1' for local access
- Use your machine's IP address for remote access
- To find your IP address:
Windows: Run 'ipconfig' in terminal
Linux/Mac: Run 'ifconfig' or 'ip addr' in terminal
* Windows: Run 'ipconfig' in terminal
* Linux/Mac: Run 'ifconfig' or 'ip addr' in terminal
""")
else:
base_url = f"{protocol}://{args.host}:{args.port}"
ASCIIColors.magenta("\n🌐 Server Access Information:")
ASCIIColors.white(" ├─ WebUI (local): ", end="")
ASCIIColors.magenta("\n[Server Access Information]:")
ASCIIColors.white(" |- WebUI (local): ", end="")
ASCIIColors.yellow(f"{base_url}")
ASCIIColors.white(" ├─ API Documentation: ", end="")
ASCIIColors.white(" |- API Documentation: ", end="")
ASCIIColors.yellow(f"{base_url}/docs")
ASCIIColors.white(" └─ Alternative Documentation: ", end="")
ASCIIColors.white(" |- Alternative Documentation: ", end="")
ASCIIColors.yellow(f"{base_url}/redoc")
# Security Notice
if args.key:
ASCIIColors.yellow("\n⚠️ Security Notice:")
ASCIIColors.yellow("\n[Security Notice]:")
ASCIIColors.white(""" API Key authentication is enabled.
Make sure to include the X-API-Key header in all your requests.
""")
if args.auth_accounts:
ASCIIColors.yellow("\n⚠️ Security Notice:")
ASCIIColors.yellow("\n[Security Notice]:")
ASCIIColors.white(""" JWT authentication is enabled.
Make sure to login before making the request, and include the 'Authorization' in the header.
""")