Auto-commit: OCR workflow improvements, performance optimizations, and bug fixes
This commit is contained in:
102
LightRAG-main/lightrag/api/workspace_manager.py
Normal file
102
LightRAG-main/lightrag/api/workspace_manager.py
Normal file
@@ -0,0 +1,102 @@
|
||||
"""
|
||||
Workspace manager for LightRAG server.
|
||||
Provides isolation between different workspaces by managing separate LightRAG instances per workspace.
|
||||
"""
|
||||
|
||||
import os
|
||||
import logging
|
||||
from pathlib import Path
|
||||
from typing import Dict, List, Optional, Tuple
|
||||
from lightrag import LightRAG
|
||||
from lightrag.api.routers.document_routes import DocumentManager
|
||||
from lightrag.api.config import global_args
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class WorkspaceManager:
|
||||
"""Manages multiple workspaces, each with its own LightRAG instance and DocumentManager."""
|
||||
|
||||
def __init__(self, args, lightrag_factory=None):
|
||||
self.args = args
|
||||
self.base_working_dir = Path(args.working_dir)
|
||||
self.base_input_dir = Path(args.input_dir)
|
||||
self.lightrag_factory = lightrag_factory
|
||||
# Cache of LightRAG instances per workspace
|
||||
self._rag_instances: Dict[str, LightRAG] = {}
|
||||
# Cache of DocumentManager instances per workspace
|
||||
self._doc_managers: Dict[str, DocumentManager] = {}
|
||||
# Ensure base directories exist
|
||||
self.base_working_dir.mkdir(parents=True, exist_ok=True)
|
||||
self.base_input_dir.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
def list_workspaces(self) -> List[str]:
|
||||
"""List all existing workspaces by scanning the working directory."""
|
||||
workspaces = []
|
||||
for item in self.base_working_dir.iterdir():
|
||||
if item.is_dir():
|
||||
# Exclude special directories
|
||||
if item.name.startswith("__") and item.name.endswith("__"):
|
||||
continue
|
||||
# Check if it's a valid workspace (has at least one storage file)
|
||||
# For simplicity, we consider any subdirectory as a workspace
|
||||
workspaces.append(item.name)
|
||||
return sorted(workspaces)
|
||||
|
||||
def create_workspace(self, name: str) -> bool:
|
||||
"""Create a new workspace directory."""
|
||||
if not name or not name.strip():
|
||||
raise ValueError("Workspace name cannot be empty")
|
||||
name = name.strip()
|
||||
# Validate name (alphanumeric, underscore, hyphen)
|
||||
if not all(c.isalnum() or c in ('_', '-') for c in name):
|
||||
raise ValueError("Workspace name can only contain alphanumeric characters, underscores, and hyphens")
|
||||
workspace_dir = self.base_working_dir / name
|
||||
input_subdir = self.base_input_dir / name
|
||||
try:
|
||||
workspace_dir.mkdir(exist_ok=True)
|
||||
input_subdir.mkdir(exist_ok=True)
|
||||
logger.info(f"Created workspace '{name}' with directories {workspace_dir}, {input_subdir}")
|
||||
return True
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to create workspace '{name}': {e}")
|
||||
raise
|
||||
|
||||
def delete_workspace(self, name: str) -> bool:
|
||||
"""Delete a workspace directory and all its data."""
|
||||
# TODO: implement deletion with caution (maybe require confirmation)
|
||||
# For now, just raise NotImplementedError
|
||||
raise NotImplementedError("Workspace deletion not yet implemented")
|
||||
|
||||
def get_rag(self, workspace: str = "") -> LightRAG:
|
||||
"""Get or create a LightRAG instance for the given workspace."""
|
||||
if not workspace:
|
||||
workspace = self.args.workspace # default workspace from args
|
||||
if workspace not in self._rag_instances:
|
||||
if self.lightrag_factory:
|
||||
# The factory is a function, not an object with .create() method
|
||||
rag = self.lightrag_factory(str(self.base_working_dir), workspace)
|
||||
else:
|
||||
# Fallback: create a simple LightRAG instance with default config
|
||||
# This is not ideal but works for testing
|
||||
from lightrag import LightRAG
|
||||
from lightrag.utils import EmbeddingFunc
|
||||
# We need to import the same configuration as used in create_app
|
||||
# For now, raise error
|
||||
raise NotImplementedError("LightRAG factory not provided")
|
||||
self._rag_instances[workspace] = rag
|
||||
return self._rag_instances[workspace]
|
||||
|
||||
def get_document_manager(self, workspace: str = "") -> DocumentManager:
|
||||
"""Get or create a DocumentManager instance for the given workspace."""
|
||||
if not workspace:
|
||||
workspace = self.args.workspace
|
||||
if workspace not in self._doc_managers:
|
||||
# Create a new DocumentManager with workspace-specific input directory
|
||||
input_dir = self.base_input_dir / workspace if workspace else self.base_input_dir
|
||||
self._doc_managers[workspace] = DocumentManager(str(input_dir), workspace=workspace)
|
||||
return self._doc_managers[workspace]
|
||||
|
||||
def workspace_exists(self, name: str) -> bool:
|
||||
"""Check if a workspace exists."""
|
||||
return (self.base_working_dir / name).exists()
|
||||
Reference in New Issue
Block a user