Auto-commit: OCR workflow improvements, performance optimizations, and bug fixes

2026-01-11 18:21:16 +08:00
parent 642dd0ea5f
commit 1ddd49f913
97 changed files with 5909 additions and 451 deletions
--- a/LightRAG-main/lightrag/api/utils_api.py
+++ b/LightRAG-main/lightrag/api/utils_api.py
@@ -173,169 +173,253 @@ def display_splash_screen(args: argparse.Namespace) -> None:
    Args:
        args: Parsed command line arguments
    """
-    # Banner
-    # Banner
-    top_border = "╔══════════════════════════════════════════════════════════════╗"
-    bottom_border = "╚══════════════════════════════════════════════════════════════╝"
-    width = len(top_border) - 4  # width inside the borders
+    # Windows console has encoding issues with Unicode characters
+    # We'll use a simplified version that works on all platforms
+    print(f"\nLightRAG Server v{core_version}/{api_version}")
+    print("Fast, Lightweight RAG Server Implementation")
+    print("=" * 60)
+    
+    # Server Configuration
+    print("\nServer Configuration:")
+    print(f"  Host: {args.host}")
+    print(f"  Port: {args.port}")
+    print(f"  Workers: {args.workers}")
+    print(f"  Timeout: {args.timeout}")
+    print(f"  CORS Origins: {args.cors_origins}")
+    print(f"  SSL Enabled: {args.ssl}")
+    if args.ssl:
+        print(f"  SSL Cert: {args.ssl_certfile}")
+        print(f"  SSL Key: {args.ssl_keyfile}")
+    print(f"  Ollama Emulating Model: {ollama_server_infos.LIGHTRAG_MODEL}")
+    print(f"  Log Level: {args.log_level}")
+    print(f"  Verbose Debug: {args.verbose}")
+    print(f"  History Turns: {args.history_turns}")
+    print(f"  API Key: {'Set' if args.key else 'Not Set'}")
+    print(f"  JWT Auth: {'Enabled' if args.auth_accounts else 'Disabled'}")

-    line1_text = f"LightRAG Server v{core_version}/{api_version}"
-    line2_text = "Fast, Lightweight RAG Server Implementation"
+    # Directory Configuration
+    print("\nDirectory Configuration:")
+    print(f"  Working Directory: {args.working_dir}")
+    print(f"  Input Directory: {args.input_dir}")

-    line1 = f"║ {line1_text.center(width)} ║"
-    line2 = f"║ {line2_text.center(width)} ║"
+    # LLM Configuration
+    print("\nLLM Configuration:")
+    print(f"  Binding: {args.llm_binding}")
+    print(f"  Host: {args.llm_binding_host}")
+    print(f"  Model: {args.llm_model}")
+    print(f"  Max Async for LLM: {args.max_async}")
+    print(f"  Summary Context Size: {args.summary_context_size}")
+    print(f"  LLM Cache Enabled: {args.enable_llm_cache}")
+    print(f"  LLM Cache for Extraction Enabled: {args.enable_llm_cache_for_extract}")

-    banner = f"""
-    {top_border}
-    {line1}
-    {line2}
-    {bottom_border}
-    """
-    ASCIIColors.cyan(banner)
+    # Embedding Configuration
+    print("\nEmbedding Configuration:")
+    print(f"  Binding: {args.embedding_binding}")
+    print(f"  Host: {args.embedding_binding_host}")
+    print(f"  Model: {args.embedding_model}")
+    print(f"  Dimensions: {args.embedding_dim}")
+
+    # RAG Configuration
+    print("\nRAG Configuration:")
+    print(f"  Summary Language: {args.summary_language}")
+    print(f"  Entity Types: {args.entity_types}")
+    print(f"  Max Parallel Insert: {args.max_parallel_insert}")
+    print(f"  Chunk Size: {args.chunk_size}")
+    print(f"  Chunk Overlap Size: {args.chunk_overlap_size}")
+    print(f"  Cosine Threshold: {args.cosine_threshold}")
+    print(f"  Top-K: {args.top_k}")
+    print(f"  Force LLM Summary on Merge: {get_env_value('FORCE_LLM_SUMMARY_ON_MERGE', DEFAULT_FORCE_LLM_SUMMARY_ON_MERGE, int)}")
+
+    # Storage Configuration
+    print("\nStorage Configuration:")
+    print(f"  KV Storage: {args.kv_storage}")
+    print(f"  Vector Storage: {args.vector_storage}")
+    print(f"  Graph Storage: {args.graph_storage}")
+    print(f"  Document Status Storage: {args.doc_status_storage}")
+    print(f"  Workspace: {args.workspace if args.workspace else '-'}")
+
+    # Server Status
+    print("\nServer starting up...\n")
+
+    # Server Access Information
+    protocol = "https" if args.ssl else "http"
+    if args.host == "0.0.0.0":
+        print("\nServer Access Information:")
+        print(f"  WebUI (local): {protocol}://localhost:{args.port}")
+        print(f"  Remote Access: {protocol}://<your-ip-address>:{args.port}")
+        print(f"  API Documentation (local): {protocol}://localhost:{args.port}/docs")
+        print(f"  Alternative Documentation (local): {protocol}://localhost:{args.port}/redoc")
+        
+        print("\nNote:")
+        print("  Since the server is running on 0.0.0.0:")
+        print("  - Use 'localhost' or '127.0.0.1' for local access")
+        print("  - Use your machine's IP address for remote access")
+        print("  - To find your IP address:")
+        print("    • Windows: Run 'ipconfig' in terminal")
+        print("    • Linux/Mac: Run 'ifconfig' or 'ip addr' in terminal")
+    else:
+        base_url = f"{protocol}://{args.host}:{args.port}"
+        print("\nServer Access Information:")
+        print(f"  WebUI (local): {base_url}")
+        print(f"  API Documentation: {base_url}/docs")
+        print(f"  Alternative Documentation: {base_url}/redoc")
+
+    # Security Notice
+    if args.key:
+        print("\nSecurity Notice:")
+        print("  API Key authentication is enabled.")
+        print("  Make sure to include the X-API-Key header in all your requests.")
+    if args.auth_accounts:
+        print("\nSecurity Notice:")
+        print("  JWT authentication is enabled.")
+        print("  Make sure to login before making the request, and include the 'Authorization' in the header.")
+
+    # Ensure splash output flush to system log
+    sys.stdout.flush()

    # Server Configuration
-    ASCIIColors.magenta("\n📡 Server Configuration:")
-    ASCIIColors.white("    ├─ Host: ", end="")
+    ASCIIColors.magenta("\n[Server Configuration]:")
+    ASCIIColors.white("    |- Host: ", end="")
    ASCIIColors.yellow(f"{args.host}")
-    ASCIIColors.white("    ├─ Port: ", end="")
+    ASCIIColors.white("    |- Port: ", end="")
    ASCIIColors.yellow(f"{args.port}")
-    ASCIIColors.white("    ├─ Workers: ", end="")
+    ASCIIColors.white("    |- Workers: ", end="")
    ASCIIColors.yellow(f"{args.workers}")
-    ASCIIColors.white("    ├─ Timeout: ", end="")
+    ASCIIColors.white("    |- Timeout: ", end="")
    ASCIIColors.yellow(f"{args.timeout}")
-    ASCIIColors.white("    ├─ CORS Origins: ", end="")
+    ASCIIColors.white("    |- CORS Origins: ", end="")
    ASCIIColors.yellow(f"{args.cors_origins}")
-    ASCIIColors.white("    ├─ SSL Enabled: ", end="")
+    ASCIIColors.white("    |- SSL Enabled: ", end="")
    ASCIIColors.yellow(f"{args.ssl}")
    if args.ssl:
-        ASCIIColors.white("    ├─ SSL Cert: ", end="")
+        ASCIIColors.white("    |- SSL Cert: ", end="")
        ASCIIColors.yellow(f"{args.ssl_certfile}")
-        ASCIIColors.white("    ├─ SSL Key: ", end="")
+        ASCIIColors.white("    |- SSL Key: ", end="")
        ASCIIColors.yellow(f"{args.ssl_keyfile}")
-    ASCIIColors.white("    ├─ Ollama Emulating Model: ", end="")
+    ASCIIColors.white("    |- Ollama Emulating Model: ", end="")
    ASCIIColors.yellow(f"{ollama_server_infos.LIGHTRAG_MODEL}")
-    ASCIIColors.white("    ├─ Log Level: ", end="")
+    ASCIIColors.white("    |- Log Level: ", end="")
    ASCIIColors.yellow(f"{args.log_level}")
-    ASCIIColors.white("    ├─ Verbose Debug: ", end="")
+    ASCIIColors.white("    |- Verbose Debug: ", end="")
    ASCIIColors.yellow(f"{args.verbose}")
-    ASCIIColors.white("    ├─ History Turns: ", end="")
+    ASCIIColors.white("    |- History Turns: ", end="")
    ASCIIColors.yellow(f"{args.history_turns}")
-    ASCIIColors.white("    ├─ API Key: ", end="")
+    ASCIIColors.white("    |- API Key: ", end="")
    ASCIIColors.yellow("Set" if args.key else "Not Set")
-    ASCIIColors.white("    └─ JWT Auth: ", end="")
+    ASCIIColors.white("    |- JWT Auth: ", end="")
    ASCIIColors.yellow("Enabled" if args.auth_accounts else "Disabled")

    # Directory Configuration
-    ASCIIColors.magenta("\n📂 Directory Configuration:")
-    ASCIIColors.white("    ├─ Working Directory: ", end="")
+    ASCIIColors.magenta("\n[Directory Configuration]:")
+    ASCIIColors.white("    |- Working Directory: ", end="")
    ASCIIColors.yellow(f"{args.working_dir}")
-    ASCIIColors.white("    └─ Input Directory: ", end="")
+    ASCIIColors.white("    |- Input Directory: ", end="")
    ASCIIColors.yellow(f"{args.input_dir}")

    # LLM Configuration
-    ASCIIColors.magenta("\n🤖 LLM Configuration:")
-    ASCIIColors.white("    ├─ Binding: ", end="")
+    ASCIIColors.magenta("\n[LLM Configuration]:")
+    ASCIIColors.white("    |- Binding: ", end="")
    ASCIIColors.yellow(f"{args.llm_binding}")
-    ASCIIColors.white("    ├─ Host: ", end="")
+    ASCIIColors.white("    |- Host: ", end="")
    ASCIIColors.yellow(f"{args.llm_binding_host}")
-    ASCIIColors.white("    ├─ Model: ", end="")
+    ASCIIColors.white("    |- Model: ", end="")
    ASCIIColors.yellow(f"{args.llm_model}")
-    ASCIIColors.white("    ├─ Max Async for LLM: ", end="")
+    ASCIIColors.white("    |- Max Async for LLM: ", end="")
    ASCIIColors.yellow(f"{args.max_async}")
-    ASCIIColors.white("    ├─ Summary Context Size: ", end="")
+    ASCIIColors.white("    |- Summary Context Size: ", end="")
    ASCIIColors.yellow(f"{args.summary_context_size}")
-    ASCIIColors.white("    ├─ LLM Cache Enabled: ", end="")
+    ASCIIColors.white("    |- LLM Cache Enabled: ", end="")
    ASCIIColors.yellow(f"{args.enable_llm_cache}")
-    ASCIIColors.white("    └─ LLM Cache for Extraction Enabled: ", end="")
+    ASCIIColors.white("    |- LLM Cache for Extraction Enabled: ", end="")
    ASCIIColors.yellow(f"{args.enable_llm_cache_for_extract}")

    # Embedding Configuration
-    ASCIIColors.magenta("\n📊 Embedding Configuration:")
-    ASCIIColors.white("    ├─ Binding: ", end="")
+    ASCIIColors.magenta("\n[Embedding Configuration]:")
+    ASCIIColors.white("    |- Binding: ", end="")
    ASCIIColors.yellow(f"{args.embedding_binding}")
-    ASCIIColors.white("    ├─ Host: ", end="")
+    ASCIIColors.white("    |- Host: ", end="")
    ASCIIColors.yellow(f"{args.embedding_binding_host}")
-    ASCIIColors.white("    ├─ Model: ", end="")
+    ASCIIColors.white("    |- Model: ", end="")
    ASCIIColors.yellow(f"{args.embedding_model}")
-    ASCIIColors.white("    └─ Dimensions: ", end="")
+    ASCIIColors.white("    |- Dimensions: ", end="")
    ASCIIColors.yellow(f"{args.embedding_dim}")

    # RAG Configuration
-    ASCIIColors.magenta("\n⚙️ RAG Configuration:")
-    ASCIIColors.white("    ├─ Summary Language: ", end="")
+    ASCIIColors.magenta("\n[RAG Configuration]:")
+    ASCIIColors.white("    |- Summary Language: ", end="")
    ASCIIColors.yellow(f"{args.summary_language}")
-    ASCIIColors.white("    ├─ Entity Types: ", end="")
+    ASCIIColors.white("    |- Entity Types: ", end="")
    ASCIIColors.yellow(f"{args.entity_types}")
-    ASCIIColors.white("    ├─ Max Parallel Insert: ", end="")
+    ASCIIColors.white("    |- Max Parallel Insert: ", end="")
    ASCIIColors.yellow(f"{args.max_parallel_insert}")
-    ASCIIColors.white("    ├─ Chunk Size: ", end="")
+    ASCIIColors.white("    |- Chunk Size: ", end="")
    ASCIIColors.yellow(f"{args.chunk_size}")
-    ASCIIColors.white("    ├─ Chunk Overlap Size: ", end="")
+    ASCIIColors.white("    |- Chunk Overlap Size: ", end="")
    ASCIIColors.yellow(f"{args.chunk_overlap_size}")
-    ASCIIColors.white("    ├─ Cosine Threshold: ", end="")
+    ASCIIColors.white("    |- Cosine Threshold: ", end="")
    ASCIIColors.yellow(f"{args.cosine_threshold}")
-    ASCIIColors.white("    ├─ Top-K: ", end="")
+    ASCIIColors.white("    |- Top-K: ", end="")
    ASCIIColors.yellow(f"{args.top_k}")
-    ASCIIColors.white("    └─ Force LLM Summary on Merge: ", end="")
+    ASCIIColors.white("    |- Force LLM Summary on Merge: ", end="")
    ASCIIColors.yellow(
        f"{get_env_value('FORCE_LLM_SUMMARY_ON_MERGE', DEFAULT_FORCE_LLM_SUMMARY_ON_MERGE, int)}"
    )

    # System Configuration
-    ASCIIColors.magenta("\n💾 Storage Configuration:")
-    ASCIIColors.white("    ├─ KV Storage: ", end="")
+    ASCIIColors.magenta("\n[Storage Configuration]:")
+    ASCIIColors.white("    |- KV Storage: ", end="")
    ASCIIColors.yellow(f"{args.kv_storage}")
-    ASCIIColors.white("    ├─ Vector Storage: ", end="")
+    ASCIIColors.white("    |- Vector Storage: ", end="")
    ASCIIColors.yellow(f"{args.vector_storage}")
-    ASCIIColors.white("    ├─ Graph Storage: ", end="")
+    ASCIIColors.white("    |- Graph Storage: ", end="")
    ASCIIColors.yellow(f"{args.graph_storage}")
-    ASCIIColors.white("    ├─ Document Status Storage: ", end="")
+    ASCIIColors.white("    |- Document Status Storage: ", end="")
    ASCIIColors.yellow(f"{args.doc_status_storage}")
-    ASCIIColors.white("    └─ Workspace: ", end="")
+    ASCIIColors.white("    |- Workspace: ", end="")
    ASCIIColors.yellow(f"{args.workspace if args.workspace else '-'}")

    # Server Status
-    ASCIIColors.green("\n✨ Server starting up...\n")
+    ASCIIColors.green("\n[Server starting up...]\n")

    # Server Access Information
    protocol = "https" if args.ssl else "http"
    if args.host == "0.0.0.0":
-        ASCIIColors.magenta("\n🌐 Server Access Information:")
-        ASCIIColors.white("    ├─ WebUI (local): ", end="")
+        ASCIIColors.magenta("\n[Server Access Information]:")
+        ASCIIColors.white("    |- WebUI (local): ", end="")
        ASCIIColors.yellow(f"{protocol}://localhost:{args.port}")
-        ASCIIColors.white("    ├─ Remote Access: ", end="")
+        ASCIIColors.white("    |- Remote Access: ", end="")
        ASCIIColors.yellow(f"{protocol}://<your-ip-address>:{args.port}")
-        ASCIIColors.white("    ├─ API Documentation (local): ", end="")
+        ASCIIColors.white("    |- API Documentation (local): ", end="")
        ASCIIColors.yellow(f"{protocol}://localhost:{args.port}/docs")
-        ASCIIColors.white("    └─ Alternative Documentation (local): ", end="")
+        ASCIIColors.white("    |- Alternative Documentation (local): ", end="")
        ASCIIColors.yellow(f"{protocol}://localhost:{args.port}/redoc")

-        ASCIIColors.magenta("\n📝 Note:")
+        ASCIIColors.magenta("\n[Note]:")
        ASCIIColors.cyan("""    Since the server is running on 0.0.0.0:
    - Use 'localhost' or '127.0.0.1' for local access
    - Use your machine's IP address for remote access
    - To find your IP address:
-      • Windows: Run 'ipconfig' in terminal
-      • Linux/Mac: Run 'ifconfig' or 'ip addr' in terminal
+      * Windows: Run 'ipconfig' in terminal
+      * Linux/Mac: Run 'ifconfig' or 'ip addr' in terminal
    """)
    else:
        base_url = f"{protocol}://{args.host}:{args.port}"
-        ASCIIColors.magenta("\n🌐 Server Access Information:")
-        ASCIIColors.white("    ├─ WebUI (local): ", end="")
+        ASCIIColors.magenta("\n[Server Access Information]:")
+        ASCIIColors.white("    |- WebUI (local): ", end="")
        ASCIIColors.yellow(f"{base_url}")
-        ASCIIColors.white("    ├─ API Documentation: ", end="")
+        ASCIIColors.white("    |- API Documentation: ", end="")
        ASCIIColors.yellow(f"{base_url}/docs")
-        ASCIIColors.white("    └─ Alternative Documentation: ", end="")
+        ASCIIColors.white("    |- Alternative Documentation: ", end="")
        ASCIIColors.yellow(f"{base_url}/redoc")

    # Security Notice
    if args.key:
-        ASCIIColors.yellow("\n⚠️  Security Notice:")
+        ASCIIColors.yellow("\n[Security Notice]:")
        ASCIIColors.white("""    API Key authentication is enabled.
    Make sure to include the X-API-Key header in all your requests.
    """)
    if args.auth_accounts:
-        ASCIIColors.yellow("\n⚠️  Security Notice:")
+        ASCIIColors.yellow("\n[Security Notice]:")
        ASCIIColors.white("""    JWT authentication is enabled.
    Make sure to login before making the request, and include the 'Authorization' in the header.
    """)