Auto-commit: OCR workflow improvements, performance optimizations, and bug fixes
This commit is contained in:
@@ -51,6 +51,7 @@ from lightrag.api.routers.query_routes import create_query_routes
|
||||
from lightrag.api.routers.graph_routes import create_graph_routes
|
||||
from lightrag.api.routers.search_routes import create_search_routes
|
||||
from lightrag.api.routers.ollama_api import OllamaAPI
|
||||
from lightrag.api.routers.workspace_routes import router as workspace_router
|
||||
|
||||
from lightrag.utils import logger, set_verbose_debug
|
||||
from lightrag.kg.shared_storage import (
|
||||
@@ -196,8 +197,9 @@ def create_app(args):
|
||||
# Check if API key is provided either through env var or args
|
||||
api_key = os.getenv("LIGHTRAG_API_KEY") or args.key
|
||||
|
||||
# Initialize document manager with workspace support for data isolation
|
||||
doc_manager = DocumentManager(args.input_dir, workspace=args.workspace)
|
||||
# Create workspace manager for dynamic workspace management
|
||||
from lightrag.api.workspace_manager import WorkspaceManager
|
||||
workspace_manager = WorkspaceManager(args)
|
||||
|
||||
@asynccontextmanager
|
||||
async def lifespan(app: FastAPI):
|
||||
@@ -206,13 +208,21 @@ def create_app(args):
|
||||
app.state.background_tasks = set()
|
||||
|
||||
try:
|
||||
# Initialize database connections
|
||||
await rag.initialize_storages()
|
||||
# Initialize default workspace if specified
|
||||
if args.workspace:
|
||||
# Ensure default workspace exists
|
||||
if not workspace_manager.workspace_exists(args.workspace):
|
||||
workspace_manager.create_workspace(args.workspace)
|
||||
|
||||
# Get default workspace RAG instance and initialize it
|
||||
default_rag = workspace_manager.get_rag(args.workspace)
|
||||
await default_rag.initialize_storages()
|
||||
|
||||
# Data migration for default workspace
|
||||
await default_rag.check_and_migrate_data()
|
||||
|
||||
await initialize_pipeline_status()
|
||||
|
||||
# Data migration regardless of storage implementation
|
||||
await rag.check_and_migrate_data()
|
||||
|
||||
pipeline_status = await get_namespace_data("pipeline_status")
|
||||
|
||||
should_start_autoscan = False
|
||||
@@ -224,20 +234,27 @@ def create_app(args):
|
||||
should_start_autoscan = True
|
||||
|
||||
# Only run auto scan when no other process started it first
|
||||
if should_start_autoscan:
|
||||
if should_start_autoscan and args.workspace:
|
||||
# Get document manager for default workspace
|
||||
default_doc_manager = workspace_manager.get_document_manager(args.workspace)
|
||||
default_rag = workspace_manager.get_rag(args.workspace)
|
||||
# Create background task
|
||||
task = asyncio.create_task(run_scanning_process(rag, doc_manager))
|
||||
task = asyncio.create_task(run_scanning_process(default_rag, default_doc_manager))
|
||||
app.state.background_tasks.add(task)
|
||||
task.add_done_callback(app.state.background_tasks.discard)
|
||||
logger.info(f"Process {os.getpid()} auto scan task started at startup.")
|
||||
logger.info(f"Process {os.getpid()} auto scan task started at startup for workspace '{args.workspace}'.")
|
||||
|
||||
ASCIIColors.green("\nServer is ready to accept connections! 🚀\n")
|
||||
|
||||
yield
|
||||
|
||||
finally:
|
||||
# Clean up database connections
|
||||
await rag.finalize_storages()
|
||||
# Clean up all workspace RAG instances
|
||||
for workspace_name, rag_instance in workspace_manager._rag_instances.items():
|
||||
try:
|
||||
await rag_instance.finalize_storages()
|
||||
except Exception as e:
|
||||
logger.error(f"Error finalizing storages for workspace '{workspace_name}': {e}")
|
||||
|
||||
# Clean up shared data
|
||||
finalize_share_data()
|
||||
@@ -580,62 +597,69 @@ def create_app(args):
|
||||
name=args.simulated_model_name, tag=args.simulated_model_tag
|
||||
)
|
||||
|
||||
# Initialize RAG with unified configuration
|
||||
try:
|
||||
rag = LightRAG(
|
||||
working_dir=args.working_dir,
|
||||
workspace=args.workspace,
|
||||
llm_model_func=create_llm_model_func(args.llm_binding),
|
||||
llm_model_name=args.llm_model,
|
||||
llm_model_max_async=args.max_async,
|
||||
summary_max_tokens=args.summary_max_tokens,
|
||||
summary_context_size=args.summary_context_size,
|
||||
chunk_token_size=int(args.chunk_size),
|
||||
chunk_overlap_token_size=int(args.chunk_overlap_size),
|
||||
llm_model_kwargs=create_llm_model_kwargs(
|
||||
args.llm_binding, args, llm_timeout
|
||||
),
|
||||
embedding_func=embedding_func,
|
||||
default_llm_timeout=llm_timeout,
|
||||
default_embedding_timeout=embedding_timeout,
|
||||
kv_storage=args.kv_storage,
|
||||
graph_storage=args.graph_storage,
|
||||
vector_storage=args.vector_storage,
|
||||
doc_status_storage=args.doc_status_storage,
|
||||
vector_db_storage_cls_kwargs={
|
||||
"cosine_better_than_threshold": args.cosine_threshold
|
||||
},
|
||||
enable_llm_cache_for_entity_extract=args.enable_llm_cache_for_extract,
|
||||
enable_llm_cache=args.enable_llm_cache,
|
||||
rerank_model_func=rerank_model_func,
|
||||
max_parallel_insert=args.max_parallel_insert,
|
||||
max_graph_nodes=args.max_graph_nodes,
|
||||
addon_params={
|
||||
"language": args.summary_language,
|
||||
"entity_types": args.entity_types,
|
||||
},
|
||||
ollama_server_infos=ollama_server_infos,
|
||||
)
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to initialize LightRAG: {e}")
|
||||
raise
|
||||
# Create a factory function for creating LightRAG instances with the given configuration
|
||||
def create_lightrag_factory():
|
||||
"""Factory function to create LightRAG instances with server configuration"""
|
||||
def factory(working_dir: str, workspace: str):
|
||||
return LightRAG(
|
||||
working_dir=working_dir,
|
||||
workspace=workspace,
|
||||
llm_model_func=create_llm_model_func(args.llm_binding),
|
||||
llm_model_name=args.llm_model,
|
||||
llm_model_max_async=args.max_async,
|
||||
summary_max_tokens=args.summary_max_tokens,
|
||||
summary_context_size=args.summary_context_size,
|
||||
chunk_token_size=int(args.chunk_size),
|
||||
chunk_overlap_token_size=int(args.chunk_overlap_size),
|
||||
llm_model_kwargs=create_llm_model_kwargs(
|
||||
args.llm_binding, args, llm_timeout
|
||||
),
|
||||
embedding_func=embedding_func,
|
||||
default_llm_timeout=llm_timeout,
|
||||
default_embedding_timeout=embedding_timeout,
|
||||
kv_storage=args.kv_storage,
|
||||
graph_storage=args.graph_storage,
|
||||
vector_storage=args.vector_storage,
|
||||
doc_status_storage=args.doc_status_storage,
|
||||
vector_db_storage_cls_kwargs={
|
||||
"cosine_better_than_threshold": args.cosine_threshold
|
||||
},
|
||||
enable_llm_cache_for_entity_extract=args.enable_llm_cache_for_extract,
|
||||
enable_llm_cache=args.enable_llm_cache,
|
||||
rerank_model_func=rerank_model_func,
|
||||
max_parallel_insert=args.max_parallel_insert,
|
||||
max_graph_nodes=args.max_graph_nodes,
|
||||
addon_params={
|
||||
"language": args.summary_language,
|
||||
"entity_types": args.entity_types,
|
||||
},
|
||||
ollama_server_infos=ollama_server_infos,
|
||||
)
|
||||
return factory
|
||||
|
||||
# Add routes
|
||||
# Create workspace manager with LightRAG factory
|
||||
workspace_manager = WorkspaceManager(args, lightrag_factory=create_lightrag_factory())
|
||||
app.state.workspace_manager = workspace_manager
|
||||
|
||||
# Add routes with workspace manager
|
||||
app.include_router(
|
||||
create_document_routes(
|
||||
rag,
|
||||
doc_manager,
|
||||
workspace_manager,
|
||||
api_key,
|
||||
)
|
||||
)
|
||||
app.include_router(create_query_routes(rag, api_key, args.top_k))
|
||||
app.include_router(create_graph_routes(rag, api_key))
|
||||
app.include_router(create_search_routes(rag, api_key, args.top_k))
|
||||
app.include_router(create_query_routes(workspace_manager, api_key, args.top_k))
|
||||
app.include_router(create_graph_routes(workspace_manager, api_key))
|
||||
app.include_router(create_search_routes(workspace_manager, api_key, args.top_k))
|
||||
|
||||
# Add Ollama API routes
|
||||
ollama_api = OllamaAPI(rag, top_k=args.top_k, api_key=api_key)
|
||||
# Add Ollama API routes with workspace manager
|
||||
ollama_api = OllamaAPI(workspace_manager, top_k=args.top_k, api_key=api_key)
|
||||
app.include_router(ollama_api.router, prefix="/api")
|
||||
|
||||
# Add workspace routes
|
||||
logger.info("Including workspace router")
|
||||
app.include_router(workspace_router)
|
||||
|
||||
@app.get("/")
|
||||
async def redirect_to_webui():
|
||||
"""Redirect root path to /webui"""
|
||||
|
||||
Reference in New Issue
Block a user