workspace working

This commit is contained in:
2026-01-12 22:31:11 +08:00
parent 2738a822d1
commit 370fe6368a
149 changed files with 4648 additions and 660 deletions

View File

@@ -17,12 +17,13 @@ from fastapi import (
Depends,
File,
HTTPException,
Request,
UploadFile,
)
from pydantic import BaseModel, Field, field_validator
from lightrag import LightRAG
from lightrag.base import DeletionResult, DocProcessingStatus, DocStatus
from lightrag.base import DeletionResult, DocProcessingStatus, DocStatus, StoragesStatus
from lightrag.utils import generate_track_id
from lightrag.api.utils_api import get_combined_auth_dependency
from ..config import global_args
@@ -1612,17 +1613,40 @@ async def background_delete_documents(
def create_document_routes(
workspace_manager, api_key: Optional[str] = None
):
# Get default RAG instance and document manager from workspace manager
rag = workspace_manager.get_rag()
doc_manager = workspace_manager.get_document_manager()
# Create combined auth dependency for document routes
combined_auth = get_combined_auth_dependency(api_key)
# Dependency to get workspace-specific RAG and DocumentManager based on X-Workspace header
async def get_workspace_rag_and_manager(request: Request):
workspace = request.headers.get("X-Workspace", "").strip()
rag = workspace_manager.get_rag(workspace)
doc_manager = workspace_manager.get_document_manager(workspace)
# Ensure storages are initialized for this workspace
try:
# Always attempt to initialize storages if not already initialized
logger.info(f"Workspace '{workspace}': storages status = {rag._storages_status}")
if rag._storages_status != StoragesStatus.FINALIZED:
logger.info(f"Initializing storages for workspace '{workspace}'")
await rag.initialize_storages()
logger.info(f"Storages initialized, status now = {rag._storages_status}")
# Debug: check doc_status lock
if hasattr(rag, 'doc_status') and hasattr(rag.doc_status, '_storage_lock'):
lock = rag.doc_status._storage_lock
logger.info(f"doc_status._storage_lock = {lock}, type = {type(lock)}")
if lock is None:
logger.error("doc_status._storage_lock is None! This will cause errors.")
except Exception as e:
logger.error(f"Failed to initialize storages for workspace '{workspace}': {e}")
raise HTTPException(status_code=500, detail=f"Storage initialization failed: {e}")
return rag, doc_manager
@router.post(
"/scan", response_model=ScanResponse, dependencies=[Depends(combined_auth)]
)
async def scan_for_new_documents(background_tasks: BackgroundTasks):
async def scan_for_new_documents(
background_tasks: BackgroundTasks,
workspace_rag_and_manager: tuple = Depends(get_workspace_rag_and_manager)
):
"""
Trigger the scanning process for new documents.
@@ -1633,6 +1657,7 @@ def create_document_routes(
Returns:
ScanResponse: A response object containing the scanning status and track_id
"""
rag, doc_manager = workspace_rag_and_manager
# Generate track_id with "scan" prefix for scanning operation
track_id = generate_track_id("scan")
@@ -1648,7 +1673,9 @@ def create_document_routes(
"/upload", response_model=InsertResponse, dependencies=[Depends(combined_auth)]
)
async def upload_to_input_dir(
background_tasks: BackgroundTasks, file: UploadFile = File(...)
background_tasks: BackgroundTasks,
workspace_rag_and_manager: tuple = Depends(get_workspace_rag_and_manager),
file: UploadFile = File(...)
):
"""
Upload a file to the input directory and index it.
@@ -1659,6 +1686,7 @@ def create_document_routes(
Args:
background_tasks: FastAPI BackgroundTasks for async processing
workspace_rag_and_manager: Tuple containing (rag, doc_manager) for the workspace
file (UploadFile): The file to be uploaded. It must have an allowed extension.
Returns:
@@ -1668,6 +1696,7 @@ def create_document_routes(
Raises:
HTTPException: If the file type is not supported (400) or other errors occur (500).
"""
rag, doc_manager = workspace_rag_and_manager
try:
# Sanitize filename to prevent Path Traversal attacks
safe_filename = sanitize_filename(file.filename, doc_manager.input_dir)
@@ -1710,7 +1739,8 @@ def create_document_routes(
"/text", response_model=InsertResponse, dependencies=[Depends(combined_auth)]
)
async def insert_text(
request: InsertTextRequest, background_tasks: BackgroundTasks
request: InsertTextRequest, background_tasks: BackgroundTasks,
workspace_rag_and_manager: tuple = Depends(get_workspace_rag_and_manager)
):
"""
Insert text into the RAG system.
@@ -1721,6 +1751,7 @@ def create_document_routes(
Args:
request (InsertTextRequest): The request body containing the text to be inserted.
background_tasks: FastAPI BackgroundTasks for async processing
workspace_rag_and_manager: Tuple containing (rag, doc_manager) for the workspace
Returns:
InsertResponse: A response object containing the status of the operation.
@@ -1728,6 +1759,7 @@ def create_document_routes(
Raises:
HTTPException: If an error occurs during text processing (500).
"""
rag, doc_manager = workspace_rag_and_manager
try:
# Generate track_id for text insertion
track_id = generate_track_id("insert")
@@ -1756,7 +1788,8 @@ def create_document_routes(
dependencies=[Depends(combined_auth)],
)
async def insert_texts(
request: InsertTextsRequest, background_tasks: BackgroundTasks
request: InsertTextsRequest, background_tasks: BackgroundTasks,
workspace_rag_and_manager: tuple = Depends(get_workspace_rag_and_manager)
):
"""
Insert multiple texts into the RAG system.
@@ -1767,6 +1800,7 @@ def create_document_routes(
Args:
request (InsertTextsRequest): The request body containing the list of texts.
background_tasks: FastAPI BackgroundTasks for async processing
workspace_rag_and_manager: Tuple containing (rag, doc_manager) for the workspace
Returns:
InsertResponse: A response object containing the status of the operation.
@@ -1774,6 +1808,7 @@ def create_document_routes(
Raises:
HTTPException: If an error occurs during text processing (500).
"""
rag, doc_manager = workspace_rag_and_manager
try:
# Generate track_id for texts insertion
track_id = generate_track_id("insert")
@@ -1799,7 +1834,9 @@ def create_document_routes(
@router.delete(
"", response_model=ClearDocumentsResponse, dependencies=[Depends(combined_auth)]
)
async def clear_documents():
async def clear_documents(
workspace_rag_and_manager: tuple = Depends(get_workspace_rag_and_manager)
):
"""
Clear all documents from the RAG system.
@@ -1807,6 +1844,9 @@ def create_document_routes(
It uses the storage drop methods to properly clean up all data and removes all files
from the input directory.
Args:
workspace_rag_and_manager: Tuple containing (rag, doc_manager) for the workspace
Returns:
ClearDocumentsResponse: A response object containing the status and message.
- status="success": All documents and files were successfully cleared.
@@ -1818,13 +1858,15 @@ def create_document_routes(
Raises:
HTTPException: Raised when a serious error occurs during the clearing process,
with status code 500 and error details in the detail field.
with status code 500 and error details in the detail field.
"""
from lightrag.kg.shared_storage import (
get_namespace_data,
get_pipeline_status_lock,
)
rag, doc_manager = workspace_rag_and_manager
# Get pipeline status and lock
pipeline_status = await get_namespace_data("pipeline_status")
pipeline_status_lock = get_pipeline_status_lock()
@@ -2080,21 +2122,27 @@ def create_document_routes(
@router.get(
"", response_model=DocsStatusesResponse, dependencies=[Depends(combined_auth)]
)
async def documents() -> DocsStatusesResponse:
async def documents(
workspace_rag_and_manager: tuple = Depends(get_workspace_rag_and_manager),
) -> DocsStatusesResponse:
"""
Get the status of all documents in the system.
This endpoint retrieves the current status of all documents, grouped by their
processing status (PENDING, PROCESSING, PROCESSED, FAILED).
Args:
workspace_rag_and_manager: Tuple containing (rag, doc_manager) for the workspace
Returns:
DocsStatusesResponse: A response object containing a dictionary where keys are
DocStatus values and values are lists of DocStatusResponse
objects representing documents in each status category.
DocStatus values and values are lists of DocStatusResponse
objects representing documents in each status category.
Raises:
HTTPException: If an error occurs while retrieving document statuses (500).
"""
rag, doc_manager = workspace_rag_and_manager
try:
statuses = (
DocStatus.PENDING,
@@ -2152,6 +2200,7 @@ def create_document_routes(
async def delete_document(
delete_request: DeleteDocRequest,
background_tasks: BackgroundTasks,
workspace_rag_and_manager: tuple = Depends(get_workspace_rag_and_manager),
) -> DeleteDocByIdResponse:
"""
Delete documents and all their associated data by their IDs using background processing.
@@ -2166,6 +2215,7 @@ def create_document_routes(
Args:
delete_request (DeleteDocRequest): The request containing the document IDs and delete_file options.
background_tasks: FastAPI BackgroundTasks for async processing
workspace_rag_and_manager: Tuple containing (rag, doc_manager) for the workspace
Returns:
DeleteDocByIdResponse: The result of the deletion operation.
@@ -2177,6 +2227,7 @@ def create_document_routes(
HTTPException:
- 500: If an unexpected internal error occurs during initialization.
"""
rag, doc_manager = workspace_rag_and_manager
doc_ids = delete_request.doc_ids
# The rag object is initialized from the server startup args,
@@ -2227,7 +2278,10 @@ def create_document_routes(
response_model=ClearCacheResponse,
dependencies=[Depends(combined_auth)],
)
async def clear_cache(request: ClearCacheRequest):
async def clear_cache(
request: ClearCacheRequest,
workspace_rag_and_manager: tuple = Depends(get_workspace_rag_and_manager),
):
"""
Clear all cache data from the LLM response cache storage.
@@ -2236,6 +2290,7 @@ def create_document_routes(
Args:
request (ClearCacheRequest): The request body (ignored for compatibility).
workspace_rag_and_manager: Tuple containing (rag, doc_manager) for the workspace
Returns:
ClearCacheResponse: A response object containing the status and message.
@@ -2243,6 +2298,7 @@ def create_document_routes(
Raises:
HTTPException: If an error occurs during cache clearing (500).
"""
rag, doc_manager = workspace_rag_and_manager
try:
# Call the aclear_cache method (no modes parameter)
await rag.aclear_cache()
@@ -2261,12 +2317,16 @@ def create_document_routes(
response_model=DeletionResult,
dependencies=[Depends(combined_auth)],
)
async def delete_entity(request: DeleteEntityRequest):
async def delete_entity(
request: DeleteEntityRequest,
workspace_rag_and_manager: tuple = Depends(get_workspace_rag_and_manager),
):
"""
Delete an entity and all its relationships from the knowledge graph.
Args:
request (DeleteEntityRequest): The request body containing the entity name.
workspace_rag_and_manager: Tuple containing (rag, doc_manager) for the workspace
Returns:
DeletionResult: An object containing the outcome of the deletion process.
@@ -2274,6 +2334,7 @@ def create_document_routes(
Raises:
HTTPException: If the entity is not found (404) or an error occurs (500).
"""
rag, doc_manager = workspace_rag_and_manager
try:
result = await rag.adelete_by_entity(entity_name=request.entity_name)
if result.status == "not_found":
@@ -2296,12 +2357,16 @@ def create_document_routes(
response_model=DeletionResult,
dependencies=[Depends(combined_auth)],
)
async def delete_relation(request: DeleteRelationRequest):
async def delete_relation(
request: DeleteRelationRequest,
workspace_rag_and_manager: tuple = Depends(get_workspace_rag_and_manager),
):
"""
Delete a relationship between two entities from the knowledge graph.
Args:
request (DeleteRelationRequest): The request body containing the source and target entity names.
workspace_rag_and_manager: Tuple containing (rag, doc_manager) for the workspace
Returns:
DeletionResult: An object containing the outcome of the deletion process.
@@ -2309,6 +2374,7 @@ def create_document_routes(
Raises:
HTTPException: If the relation is not found (404) or an error occurs (500).
"""
rag, doc_manager = workspace_rag_and_manager
try:
result = await rag.adelete_by_relation(
source_entity=request.source_entity,
@@ -2334,7 +2400,10 @@ def create_document_routes(
response_model=TrackStatusResponse,
dependencies=[Depends(combined_auth)],
)
async def get_track_status(track_id: str) -> TrackStatusResponse:
async def get_track_status(
track_id: str,
workspace_rag_and_manager: tuple = Depends(get_workspace_rag_and_manager),
) -> TrackStatusResponse:
"""
Get the processing status of documents by tracking ID.
@@ -2343,6 +2412,7 @@ def create_document_routes(
Args:
track_id (str): The tracking ID returned from upload, text, or texts endpoints
workspace_rag_and_manager: Tuple containing (rag, doc_manager) for the workspace
Returns:
TrackStatusResponse: A response object containing:
@@ -2353,6 +2423,7 @@ def create_document_routes(
Raises:
HTTPException: If track_id is invalid (400) or an error occurs (500).
"""
rag, doc_manager = workspace_rag_and_manager
try:
# Validate track_id
if not track_id or not track_id.strip():
@@ -2410,6 +2481,7 @@ def create_document_routes(
)
async def get_documents_paginated(
request: DocumentsRequest,
workspace_rag_and_manager: tuple = Depends(get_workspace_rag_and_manager),
) -> PaginatedDocsResponse:
"""
Get documents with pagination support.
@@ -2420,6 +2492,7 @@ def create_document_routes(
Args:
request (DocumentsRequest): The request body containing pagination parameters
workspace_rag_and_manager: Tuple containing (rag, doc_manager) for the workspace
Returns:
PaginatedDocsResponse: A response object containing:
@@ -2430,6 +2503,7 @@ def create_document_routes(
Raises:
HTTPException: If an error occurs while retrieving documents (500).
"""
rag, doc_manager = workspace_rag_and_manager
try:
# Get paginated documents and status counts in parallel
docs_task = rag.doc_status.get_docs_paginated(
@@ -2495,19 +2569,25 @@ def create_document_routes(
response_model=StatusCountsResponse,
dependencies=[Depends(combined_auth)],
)
async def get_document_status_counts() -> StatusCountsResponse:
async def get_document_status_counts(
workspace_rag_and_manager: tuple = Depends(get_workspace_rag_and_manager),
) -> StatusCountsResponse:
"""
Get counts of documents by status.
This endpoint retrieves the count of documents in each processing status
(PENDING, PROCESSING, PROCESSED, FAILED) for all documents in the system.
Args:
workspace_rag_and_manager: Tuple containing (rag, doc_manager) for the workspace
Returns:
StatusCountsResponse: A response object containing status counts
Raises:
HTTPException: If an error occurs while retrieving status counts (500).
"""
rag, doc_manager = workspace_rag_and_manager
try:
status_counts = await rag.doc_status.get_all_status_counts()
return StatusCountsResponse(status_counts=status_counts)
@@ -2521,7 +2601,10 @@ def create_document_routes(
"/download/{filename}",
dependencies=[Depends(combined_auth)],
)
async def download_document(filename: str):
async def download_document(
filename: str,
workspace_rag_and_manager: tuple = Depends(get_workspace_rag_and_manager),
):
"""
Download a document file by filename.
@@ -2530,6 +2613,7 @@ def create_document_routes(
Args:
filename (str): The name of the file to download
workspace_rag_and_manager: Tuple containing (rag, doc_manager) for the workspace
Returns:
FileResponse: The file content with appropriate headers for download
@@ -2537,32 +2621,33 @@ def create_document_routes(
Raises:
HTTPException: If file is not found (404) or an error occurs (500).
"""
rag, doc_manager = workspace_rag_and_manager
try:
# Sanitize filename to prevent path traversal
safe_filename = sanitize_filename(filename, doc_manager.input_dir)
# First check in the __enqueued__ directory (where processed files are moved)
enqueued_dir = doc_manager.input_dir / "__enqueued__"
file_path = enqueued_dir / safe_filename
# If not found in __enqueued__, check in the main input directory
if not file_path.exists():
file_path = doc_manager.input_dir / safe_filename
# If still not found, return 404
if not file_path.exists():
raise HTTPException(
status_code=404,
detail=f"File '{safe_filename}' not found in the system"
)
# Check if file is within the allowed directory (security check)
if not file_path.is_relative_to(doc_manager.input_dir):
raise HTTPException(
status_code=400,
detail="Invalid file path"
)
# Determine content type based on file extension
content_type = "application/octet-stream"
if safe_filename.lower().endswith(".pdf"):