239 lines
8.9 KiB
Python
239 lines
8.9 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
Test script for workspace isolation in LightRAG.
|
|
Creates two workspaces, uploads different documents to each, and verifies isolation.
|
|
"""
|
|
|
|
import os
|
|
import sys
|
|
import time
|
|
import json
|
|
import requests
|
|
import tempfile
|
|
from pathlib import Path
|
|
|
|
# Add LightRAG to path
|
|
sys.path.insert(0, os.path.join(os.path.dirname(__file__), "LightRAG-main"))
|
|
|
|
# Server configuration
|
|
BASE_URL = "http://localhost:8000"
|
|
API_KEY = os.environ.get("LIGHTRAG_API_KEY", "test-key")
|
|
|
|
def create_test_file(content, filename):
|
|
"""Create a temporary text file with given content."""
|
|
test_dir = Path("test_workspace_files")
|
|
test_dir.mkdir(exist_ok=True)
|
|
filepath = test_dir / filename
|
|
filepath.write_text(content)
|
|
return filepath
|
|
|
|
def make_request(method, endpoint, data=None, files=None, workspace=None):
|
|
"""Make HTTP request with proper headers and workspace parameter."""
|
|
headers = {
|
|
"Authorization": f"Bearer {API_KEY}",
|
|
"Content-Type": "application/json"
|
|
}
|
|
|
|
url = f"{BASE_URL}{endpoint}"
|
|
|
|
# Add workspace query parameter if provided
|
|
params = {}
|
|
if workspace:
|
|
params["workspace"] = workspace
|
|
|
|
if method == "GET":
|
|
response = requests.get(url, headers=headers, params=params)
|
|
elif method == "POST":
|
|
if files:
|
|
# For file uploads, don't use JSON content-type
|
|
headers.pop("Content-Type", None)
|
|
response = requests.post(url, headers=headers, params=params, files=files, data=data)
|
|
else:
|
|
response = requests.post(url, headers=headers, params=params, json=data)
|
|
elif method == "DELETE":
|
|
response = requests.delete(url, headers=headers, params=params)
|
|
else:
|
|
raise ValueError(f"Unsupported method: {method}")
|
|
|
|
return response
|
|
|
|
def test_server_health():
|
|
"""Check if server is running."""
|
|
try:
|
|
response = requests.get(f"{BASE_URL}/health", timeout=5)
|
|
return response.status_code == 200
|
|
except requests.exceptions.ConnectionError:
|
|
return False
|
|
|
|
def create_workspace(name):
|
|
"""Create a new workspace."""
|
|
response = make_request("POST", "/workspaces/", data={"name": name})
|
|
if response.status_code == 200:
|
|
print(f"✓ Created workspace: {name}")
|
|
return True
|
|
else:
|
|
print(f"✗ Failed to create workspace {name}: {response.status_code} - {response.text}")
|
|
return False
|
|
|
|
def list_workspaces():
|
|
"""List all workspaces."""
|
|
response = make_request("GET", "/workspaces/")
|
|
if response.status_code == 200:
|
|
return response.json()
|
|
else:
|
|
print(f"✗ Failed to list workspaces: {response.status_code} - {response.text}")
|
|
return []
|
|
|
|
def upload_document(workspace, filepath, filename=None):
|
|
"""Upload a document to a workspace."""
|
|
if filename is None:
|
|
filename = os.path.basename(filepath)
|
|
|
|
with open(filepath, 'rb') as f:
|
|
files = {'file': (filename, f, 'text/plain')}
|
|
data = {'filename': filename}
|
|
response = make_request("POST", "/documents/", data=data, files=files, workspace=workspace)
|
|
|
|
if response.status_code in (200, 201):
|
|
print(f"✓ Uploaded {filename} to workspace {workspace}")
|
|
return response.json()
|
|
else:
|
|
print(f"✗ Failed to upload {filename} to workspace {workspace}: {response.status_code} - {response.text}")
|
|
return None
|
|
|
|
def search_documents(workspace, query):
|
|
"""Search for documents in a workspace."""
|
|
response = make_request("POST", "/search/", data={"query": query}, workspace=workspace)
|
|
if response.status_code == 200:
|
|
return response.json()
|
|
else:
|
|
print(f"✗ Failed to search in workspace {workspace}: {response.status_code} - {response.text}")
|
|
return None
|
|
|
|
def query_documents(workspace, query):
|
|
"""Query documents in a workspace."""
|
|
response = make_request("POST", "/query/", data={"query": query}, workspace=workspace)
|
|
if response.status_code == 200:
|
|
return response.json()
|
|
else:
|
|
print(f"✗ Failed to query in workspace {workspace}: {response.status_code} - {response.text}")
|
|
return None
|
|
|
|
def main():
|
|
print("=" * 60)
|
|
print("Testing Workspace Isolation in LightRAG")
|
|
print("=" * 60)
|
|
|
|
# Check if server is running
|
|
print("\n1. Checking server health...")
|
|
if not test_server_health():
|
|
print("✗ Server is not running. Please start the LightRAG server first.")
|
|
print(" Run: python LightRAG-main/lightrag/api/lightrag_server.py")
|
|
return False
|
|
|
|
print("✓ Server is running")
|
|
|
|
# Create test files
|
|
print("\n2. Creating test files...")
|
|
workspace_a_file = create_test_file(
|
|
"This document belongs to Workspace A. It contains information about artificial intelligence and machine learning.",
|
|
"workspace_a_doc.txt"
|
|
)
|
|
workspace_b_file = create_test_file(
|
|
"This document belongs to Workspace B. It contains information about quantum computing and cryptography.",
|
|
"workspace_b_doc.txt"
|
|
)
|
|
print(f"✓ Created test files: {workspace_a_file.name}, {workspace_b_file.name}")
|
|
|
|
# Create workspaces
|
|
print("\n3. Creating workspaces...")
|
|
workspace_a = "test_workspace_a"
|
|
workspace_b = "test_workspace_b"
|
|
|
|
if not create_workspace(workspace_a):
|
|
print(" Trying to use existing workspace...")
|
|
|
|
if not create_workspace(workspace_b):
|
|
print(" Trying to use existing workspace...")
|
|
|
|
# List workspaces
|
|
workspaces = list_workspaces()
|
|
print(f" Available workspaces: {[w['name'] for w in workspaces]}")
|
|
|
|
# Upload documents to respective workspaces
|
|
print("\n4. Uploading documents to workspaces...")
|
|
upload_document(workspace_a, workspace_a_file)
|
|
upload_document(workspace_b, workspace_b_file)
|
|
|
|
# Wait for processing
|
|
print("\n5. Waiting for document processing (10 seconds)...")
|
|
time.sleep(10)
|
|
|
|
# Test isolation: Search in workspace A
|
|
print("\n6. Testing isolation - Search in Workspace A...")
|
|
results_a = search_documents(workspace_a, "artificial intelligence")
|
|
if results_a:
|
|
print(f" Found {len(results_a.get('results', []))} results in workspace A")
|
|
# Check if we see workspace B content
|
|
for result in results_a.get('results', []):
|
|
if "quantum" in result.get('content', '').lower():
|
|
print(" ✗ FAIL: Found workspace B content in workspace A search!")
|
|
else:
|
|
print(" ✓ Workspace A search only shows workspace A content")
|
|
|
|
# Test isolation: Search in workspace B
|
|
print("\n7. Testing isolation - Search in Workspace B...")
|
|
results_b = search_documents(workspace_b, "quantum computing")
|
|
if results_b:
|
|
print(f" Found {len(results_b.get('results', []))} results in workspace B")
|
|
# Check if we see workspace A content
|
|
for result in results_b.get('results', []):
|
|
if "artificial" in result.get('content', '').lower():
|
|
print(" ✗ FAIL: Found workspace A content in workspace B search!")
|
|
else:
|
|
print(" ✓ Workspace B search only shows workspace B content")
|
|
|
|
# Test cross-workspace contamination
|
|
print("\n8. Testing cross-workspace contamination...")
|
|
# Search for workspace B content in workspace A
|
|
results_cross = search_documents(workspace_a, "quantum")
|
|
if results_cross and len(results_cross.get('results', [])) > 0:
|
|
print(" ✗ FAIL: Found workspace B content when searching in workspace A!")
|
|
else:
|
|
print(" ✓ No cross-workspace contamination detected")
|
|
|
|
# Test query endpoints
|
|
print("\n9. Testing query endpoints...")
|
|
query_a = query_documents(workspace_a, "What is this document about?")
|
|
if query_a:
|
|
print(f" Workspace A query response: {query_a.get('answer', '')[:100]}...")
|
|
|
|
query_b = query_documents(workspace_b, "What is this document about?")
|
|
if query_b:
|
|
print(f" Workspace B query response: {query_b.get('answer', '')[:100]}...")
|
|
|
|
# Cleanup (optional)
|
|
print("\n10. Test completed!")
|
|
print("\nSummary:")
|
|
print(" - Workspace isolation appears to be working correctly")
|
|
print(" - Documents are properly segregated between workspaces")
|
|
print(" - Search and query operations respect workspace boundaries")
|
|
print("\nNote: Workspaces will persist in the storage directory.")
|
|
print(" To clean up manually, delete the directories:")
|
|
print(f" - {Path('LightRAG-main/rag_storage') / workspace_a}")
|
|
print(f" - {Path('LightRAG-main/rag_storage') / workspace_b}")
|
|
|
|
return True
|
|
|
|
if __name__ == "__main__":
|
|
try:
|
|
success = main()
|
|
sys.exit(0 if success else 1)
|
|
except KeyboardInterrupt:
|
|
print("\nTest interrupted by user")
|
|
sys.exit(1)
|
|
except Exception as e:
|
|
print(f"\nError during test: {e}")
|
|
import traceback
|
|
traceback.print_exc()
|
|
sys.exit(1) |