146 lines
6.5 KiB
Python
146 lines
6.5 KiB
Python
"""
|
|
Workspace manager for LightRAG server.
|
|
Provides isolation between different workspaces by managing separate LightRAG instances per workspace.
|
|
"""
|
|
|
|
import os
|
|
import logging
|
|
import asyncio
|
|
from pathlib import Path
|
|
from typing import Dict, List, Optional, Tuple
|
|
from lightrag import LightRAG
|
|
from lightrag.api.routers.document_routes import DocumentManager
|
|
from lightrag.api.config import global_args
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
|
class WorkspaceManager:
|
|
"""Manages multiple workspaces, each with its own LightRAG instance and DocumentManager."""
|
|
|
|
def __init__(self, args, lightrag_factory=None):
|
|
self.args = args
|
|
self.base_working_dir = Path(args.working_dir)
|
|
self.base_input_dir = Path(args.input_dir)
|
|
self.lightrag_factory = lightrag_factory
|
|
# Cache of LightRAG instances per workspace
|
|
self._rag_instances: Dict[str, LightRAG] = {}
|
|
# Cache of DocumentManager instances per workspace
|
|
self._doc_managers: Dict[str, DocumentManager] = {}
|
|
# Ensure base directories exist
|
|
self.base_working_dir.mkdir(parents=True, exist_ok=True)
|
|
self.base_input_dir.mkdir(parents=True, exist_ok=True)
|
|
|
|
def list_workspaces(self) -> List[str]:
|
|
"""List all existing workspaces by scanning the working directory."""
|
|
workspaces = []
|
|
for item in self.base_working_dir.iterdir():
|
|
if item.is_dir():
|
|
# Exclude special directories
|
|
if item.name.startswith("__") and item.name.endswith("__"):
|
|
continue
|
|
# Check if it's a valid workspace (has at least one storage file)
|
|
# For simplicity, we consider any subdirectory as a workspace
|
|
workspaces.append(item.name)
|
|
return sorted(workspaces)
|
|
|
|
def create_workspace(self, name: str) -> bool:
|
|
"""Create a new workspace directory."""
|
|
if not name or not name.strip():
|
|
raise ValueError("Workspace name cannot be empty")
|
|
name = name.strip()
|
|
# Validate name (alphanumeric, underscore, hyphen)
|
|
if not all(c.isalnum() or c in ('_', '-') for c in name):
|
|
raise ValueError("Workspace name can only contain alphanumeric characters, underscores, and hyphens")
|
|
workspace_dir = self.base_working_dir / name
|
|
input_subdir = self.base_input_dir / name
|
|
try:
|
|
workspace_dir.mkdir(exist_ok=True)
|
|
input_subdir.mkdir(exist_ok=True)
|
|
logger.info(f"Created workspace '{name}' with directories {workspace_dir}, {input_subdir}")
|
|
return True
|
|
except Exception as e:
|
|
logger.error(f"Failed to create workspace '{name}': {e}")
|
|
raise
|
|
|
|
async def delete_workspace(self, name: str) -> bool:
|
|
"""Delete a workspace directory and all its data."""
|
|
import shutil
|
|
from pathlib import Path
|
|
|
|
# Validate name
|
|
if not name or not name.strip():
|
|
raise ValueError("Workspace name cannot be empty")
|
|
name = name.strip()
|
|
|
|
# Check if workspace exists
|
|
workspace_dir = self.base_working_dir / name
|
|
input_subdir = self.base_input_dir / name
|
|
if not workspace_dir.exists() and not input_subdir.exists():
|
|
raise ValueError(f"Workspace '{name}' does not exist")
|
|
|
|
# Remove cached instances
|
|
if name in self._rag_instances:
|
|
del self._rag_instances[name]
|
|
if name in self._doc_managers:
|
|
del self._doc_managers[name]
|
|
|
|
# Delete workspace data from storage (vector DB, KV, graph, etc.)
|
|
try:
|
|
if self.lightrag_factory:
|
|
# Create a temporary LightRAG instance for this workspace to delete its data
|
|
rag = self.lightrag_factory(str(self.base_working_dir), name)
|
|
# Call async delete_workspace_data
|
|
await rag.adelete_workspace_data()
|
|
logger.info(f"Deleted workspace data for '{name}' from storage")
|
|
else:
|
|
logger.warning("No lightrag_factory provided; workspace data may remain in storage")
|
|
except Exception as e:
|
|
logger.warning(f"Failed to delete workspace data for '{name}': {e}. Proceeding with directory deletion.")
|
|
|
|
# Delete directories recursively
|
|
try:
|
|
if workspace_dir.exists():
|
|
shutil.rmtree(workspace_dir)
|
|
logger.info(f"Deleted workspace directory: {workspace_dir}")
|
|
if input_subdir.exists():
|
|
shutil.rmtree(input_subdir)
|
|
logger.info(f"Deleted workspace input directory: {input_subdir}")
|
|
except Exception as e:
|
|
logger.error(f"Failed to delete workspace '{name}': {e}")
|
|
raise
|
|
|
|
return True
|
|
|
|
def get_rag(self, workspace: str = "") -> LightRAG:
|
|
"""Get or create a LightRAG instance for the given workspace."""
|
|
if not workspace:
|
|
workspace = self.args.workspace # default workspace from args
|
|
if workspace not in self._rag_instances:
|
|
if self.lightrag_factory:
|
|
# The factory is a function, not an object with .create() method
|
|
rag = self.lightrag_factory(str(self.base_working_dir), workspace)
|
|
else:
|
|
# Fallback: create a simple LightRAG instance with default config
|
|
# This is not ideal but works for testing
|
|
from lightrag import LightRAG
|
|
from lightrag.utils import EmbeddingFunc
|
|
# We need to import the same configuration as used in create_app
|
|
# For now, raise error
|
|
raise NotImplementedError("LightRAG factory not provided")
|
|
self._rag_instances[workspace] = rag
|
|
return self._rag_instances[workspace]
|
|
|
|
def get_document_manager(self, workspace: str = "") -> DocumentManager:
|
|
"""Get or create a DocumentManager instance for the given workspace."""
|
|
if not workspace:
|
|
workspace = self.args.workspace
|
|
if workspace not in self._doc_managers:
|
|
# Create a new DocumentManager with workspace-specific input directory
|
|
input_dir = self.base_input_dir / workspace if workspace else self.base_input_dir
|
|
self._doc_managers[workspace] = DocumentManager(str(input_dir), workspace=workspace)
|
|
return self._doc_managers[workspace]
|
|
|
|
def workspace_exists(self, name: str) -> bool:
|
|
"""Check if a workspace exists."""
|
|
return (self.base_working_dir / name).exists() |