workspace working
This commit is contained in:
@@ -1,239 +1,207 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Test script for workspace isolation in LightRAG.
|
||||
Creates two workspaces, uploads different documents to each, and verifies isolation.
|
||||
"""
|
||||
|
||||
import os
|
||||
import sys
|
||||
import time
|
||||
import json
|
||||
import requests
|
||||
import tempfile
|
||||
import time
|
||||
import sys
|
||||
import json
|
||||
from pathlib import Path
|
||||
|
||||
# Add LightRAG to path
|
||||
sys.path.insert(0, os.path.join(os.path.dirname(__file__), "LightRAG-main"))
|
||||
SERVER_URL = "http://localhost:3015"
|
||||
API_KEY = "jleu1212"
|
||||
|
||||
# Server configuration
|
||||
BASE_URL = "http://localhost:8000"
|
||||
API_KEY = os.environ.get("LIGHTRAG_API_KEY", "test-key")
|
||||
|
||||
def create_test_file(content, filename):
|
||||
"""Create a temporary text file with given content."""
|
||||
test_dir = Path("test_workspace_files")
|
||||
test_dir.mkdir(exist_ok=True)
|
||||
filepath = test_dir / filename
|
||||
filepath.write_text(content)
|
||||
return filepath
|
||||
|
||||
def make_request(method, endpoint, data=None, files=None, workspace=None):
|
||||
"""Make HTTP request with proper headers and workspace parameter."""
|
||||
headers = {
|
||||
"Authorization": f"Bearer {API_KEY}",
|
||||
"Content-Type": "application/json"
|
||||
}
|
||||
|
||||
url = f"{BASE_URL}{endpoint}"
|
||||
|
||||
# Add workspace query parameter if provided
|
||||
params = {}
|
||||
def get_headers(workspace=None):
|
||||
headers = {"X-API-Key": API_KEY}
|
||||
if workspace:
|
||||
params["workspace"] = workspace
|
||||
|
||||
if method == "GET":
|
||||
response = requests.get(url, headers=headers, params=params)
|
||||
elif method == "POST":
|
||||
if files:
|
||||
# For file uploads, don't use JSON content-type
|
||||
headers.pop("Content-Type", None)
|
||||
response = requests.post(url, headers=headers, params=params, files=files, data=data)
|
||||
headers["X-Workspace"] = workspace
|
||||
return headers
|
||||
|
||||
def upload_file(file_path, workspace=None):
|
||||
"""Upload file to workspace"""
|
||||
headers = get_headers(workspace)
|
||||
with open(file_path, 'rb') as f:
|
||||
files = {'file': (file_path.name, f)}
|
||||
resp = requests.post(f"{SERVER_URL}/documents/upload", files=files, headers=headers)
|
||||
if resp.status_code != 200:
|
||||
print(f"Upload failed: {resp.status_code} {resp.text}")
|
||||
return None
|
||||
data = resp.json()
|
||||
print(f"Uploaded {file_path.name} to workspace {workspace}: track_id {data.get('track_id')}")
|
||||
return data.get('track_id')
|
||||
|
||||
def wait_for_indexing(timeout=120):
|
||||
"""Wait until pipeline is not busy"""
|
||||
start = time.time()
|
||||
while time.time() - start < timeout:
|
||||
resp = requests.get(f"{SERVER_URL}/documents/pipeline_status", headers=get_headers())
|
||||
if resp.status_code == 200:
|
||||
data = resp.json()
|
||||
if not data.get('busy', False):
|
||||
print("Pipeline idle, indexing likely complete")
|
||||
return True
|
||||
else:
|
||||
print(f"Pipeline busy: {data.get('job_name')} {data.get('cur_batch')}/{data.get('batchs')}")
|
||||
else:
|
||||
response = requests.post(url, headers=headers, params=params, json=data)
|
||||
elif method == "DELETE":
|
||||
response = requests.delete(url, headers=headers, params=params)
|
||||
else:
|
||||
raise ValueError(f"Unsupported method: {method}")
|
||||
|
||||
return response
|
||||
print(f"Failed to get pipeline status: {resp.status_code}")
|
||||
time.sleep(5)
|
||||
print("Timeout waiting for indexing")
|
||||
return False
|
||||
|
||||
def test_server_health():
|
||||
"""Check if server is running."""
|
||||
try:
|
||||
response = requests.get(f"{BASE_URL}/health", timeout=5)
|
||||
return response.status_code == 200
|
||||
except requests.exceptions.ConnectionError:
|
||||
return False
|
||||
|
||||
def create_workspace(name):
|
||||
"""Create a new workspace."""
|
||||
response = make_request("POST", "/workspaces/", data={"name": name})
|
||||
if response.status_code == 200:
|
||||
print(f"✓ Created workspace: {name}")
|
||||
return True
|
||||
else:
|
||||
print(f"✗ Failed to create workspace {name}: {response.status_code} - {response.text}")
|
||||
return False
|
||||
def search(query, workspace=None):
|
||||
headers = get_headers(workspace)
|
||||
resp = requests.post(f"{SERVER_URL}/search", json={"query": query}, headers=headers)
|
||||
if resp.status_code != 200:
|
||||
print(f"Search failed: {resp.status_code} {resp.text}")
|
||||
return None
|
||||
data = resp.json()
|
||||
return data
|
||||
|
||||
def list_workspaces():
|
||||
"""List all workspaces."""
|
||||
response = make_request("GET", "/workspaces/")
|
||||
if response.status_code == 200:
|
||||
return response.json()
|
||||
resp = requests.get(f"{SERVER_URL}/workspaces/", headers=get_headers())
|
||||
if resp.status_code == 200:
|
||||
return resp.json()
|
||||
else:
|
||||
print(f"✗ Failed to list workspaces: {response.status_code} - {response.text}")
|
||||
print(f"Failed to list workspaces: {resp.status_code} {resp.text}")
|
||||
return []
|
||||
|
||||
def upload_document(workspace, filepath, filename=None):
|
||||
"""Upload a document to a workspace."""
|
||||
if filename is None:
|
||||
filename = os.path.basename(filepath)
|
||||
|
||||
with open(filepath, 'rb') as f:
|
||||
files = {'file': (filename, f, 'text/plain')}
|
||||
data = {'filename': filename}
|
||||
response = make_request("POST", "/documents/", data=data, files=files, workspace=workspace)
|
||||
|
||||
if response.status_code in (200, 201):
|
||||
print(f"✓ Uploaded {filename} to workspace {workspace}")
|
||||
return response.json()
|
||||
def create_workspace(name):
|
||||
resp = requests.post(f"{SERVER_URL}/workspaces/", json={"name": name}, headers=get_headers())
|
||||
if resp.status_code == 200:
|
||||
print(f"Created workspace {name}")
|
||||
return True
|
||||
else:
|
||||
print(f"✗ Failed to upload {filename} to workspace {workspace}: {response.status_code} - {response.text}")
|
||||
return None
|
||||
print(f"Failed to create workspace: {resp.status_code} {resp.text}")
|
||||
return False
|
||||
|
||||
def search_documents(workspace, query):
|
||||
"""Search for documents in a workspace."""
|
||||
response = make_request("POST", "/search/", data={"query": query}, workspace=workspace)
|
||||
if response.status_code == 200:
|
||||
return response.json()
|
||||
def delete_workspace(name):
|
||||
resp = requests.delete(f"{SERVER_URL}/workspaces/{name}", headers=get_headers())
|
||||
if resp.status_code == 200:
|
||||
print(f"Deleted workspace {name}")
|
||||
return True
|
||||
else:
|
||||
print(f"✗ Failed to search in workspace {workspace}: {response.status_code} - {response.text}")
|
||||
return None
|
||||
print(f"Failed to delete workspace: {resp.status_code} {resp.text}")
|
||||
return False
|
||||
|
||||
def query_documents(workspace, query):
|
||||
"""Query documents in a workspace."""
|
||||
response = make_request("POST", "/query/", data={"query": query}, workspace=workspace)
|
||||
if response.status_code == 200:
|
||||
return response.json()
|
||||
def get_documents(workspace=None):
|
||||
headers = get_headers(workspace)
|
||||
resp = requests.get(f"{SERVER_URL}/documents", headers=headers)
|
||||
if resp.status_code == 200:
|
||||
data = resp.json()
|
||||
return data
|
||||
else:
|
||||
print(f"✗ Failed to query in workspace {workspace}: {response.status_code} - {response.text}")
|
||||
print(f"Failed to get documents: {resp.status_code} {resp.text}")
|
||||
return None
|
||||
|
||||
def main():
|
||||
print("=" * 60)
|
||||
print("Testing Workspace Isolation in LightRAG")
|
||||
print("=" * 60)
|
||||
# Ensure test files exist
|
||||
test_dir = Path("test")
|
||||
if not test_dir.exists():
|
||||
print("Test directory not found")
|
||||
sys.exit(1)
|
||||
|
||||
# Check if server is running
|
||||
print("\n1. Checking server health...")
|
||||
if not test_server_health():
|
||||
print("✗ Server is not running. Please start the LightRAG server first.")
|
||||
print(" Run: python LightRAG-main/lightrag/api/lightrag_server.py")
|
||||
return False
|
||||
file1 = test_dir / "test.docx"
|
||||
file2 = test_dir / "ocr.pdf"
|
||||
if not file1.exists() or not file2.exists():
|
||||
print("Test files missing")
|
||||
sys.exit(1)
|
||||
|
||||
print("✓ Server is running")
|
||||
# Create fresh workspaces
|
||||
ws1 = "isolated_ws1"
|
||||
ws2 = "isolated_ws2"
|
||||
|
||||
# Create test files
|
||||
print("\n2. Creating test files...")
|
||||
workspace_a_file = create_test_file(
|
||||
"This document belongs to Workspace A. It contains information about artificial intelligence and machine learning.",
|
||||
"workspace_a_doc.txt"
|
||||
)
|
||||
workspace_b_file = create_test_file(
|
||||
"This document belongs to Workspace B. It contains information about quantum computing and cryptography.",
|
||||
"workspace_b_doc.txt"
|
||||
)
|
||||
print(f"✓ Created test files: {workspace_a_file.name}, {workspace_b_file.name}")
|
||||
# Delete if they already exist (cleanup)
|
||||
workspaces = list_workspaces()
|
||||
for ws in workspaces:
|
||||
if ws['name'] in [ws1, ws2]:
|
||||
delete_workspace(ws['name'])
|
||||
|
||||
# Create workspaces
|
||||
print("\n3. Creating workspaces...")
|
||||
workspace_a = "test_workspace_a"
|
||||
workspace_b = "test_workspace_b"
|
||||
create_workspace(ws1)
|
||||
create_workspace(ws2)
|
||||
|
||||
if not create_workspace(workspace_a):
|
||||
print(" Trying to use existing workspace...")
|
||||
# Upload file1 to ws1
|
||||
track1 = upload_file(file1, workspace=ws1)
|
||||
if not track1:
|
||||
print("Failed to upload file1")
|
||||
sys.exit(1)
|
||||
|
||||
if not create_workspace(workspace_b):
|
||||
print(" Trying to use existing workspace...")
|
||||
# Upload file2 to ws2
|
||||
track2 = upload_file(file2, workspace=ws2)
|
||||
if not track2:
|
||||
print("Failed to upload file2")
|
||||
sys.exit(1)
|
||||
|
||||
# List workspaces
|
||||
workspaces = list_workspaces()
|
||||
print(f" Available workspaces: {[w['name'] for w in workspaces]}")
|
||||
# Wait for indexing
|
||||
print("Waiting for indexing...")
|
||||
if not wait_for_indexing():
|
||||
print("Indexing timed out, but continuing")
|
||||
|
||||
# Upload documents to respective workspaces
|
||||
print("\n4. Uploading documents to workspaces...")
|
||||
upload_document(workspace_a, workspace_a_file)
|
||||
upload_document(workspace_b, workspace_b_file)
|
||||
|
||||
# Wait for processing
|
||||
print("\n5. Waiting for document processing (10 seconds)...")
|
||||
# Give extra time for processing
|
||||
time.sleep(10)
|
||||
|
||||
# Test isolation: Search in workspace A
|
||||
print("\n6. Testing isolation - Search in Workspace A...")
|
||||
results_a = search_documents(workspace_a, "artificial intelligence")
|
||||
if results_a:
|
||||
print(f" Found {len(results_a.get('results', []))} results in workspace A")
|
||||
# Check if we see workspace B content
|
||||
for result in results_a.get('results', []):
|
||||
if "quantum" in result.get('content', '').lower():
|
||||
print(" ✗ FAIL: Found workspace B content in workspace A search!")
|
||||
else:
|
||||
print(" ✓ Workspace A search only shows workspace A content")
|
||||
# Check documents in each workspace
|
||||
print("\n=== Documents in ws1 ===")
|
||||
docs1 = get_documents(workspace=ws1)
|
||||
if docs1:
|
||||
for status, doc_list in docs1.get('statuses', {}).items():
|
||||
print(f"{status}: {len(doc_list)}")
|
||||
|
||||
# Test isolation: Search in workspace B
|
||||
print("\n7. Testing isolation - Search in Workspace B...")
|
||||
results_b = search_documents(workspace_b, "quantum computing")
|
||||
if results_b:
|
||||
print(f" Found {len(results_b.get('results', []))} results in workspace B")
|
||||
# Check if we see workspace A content
|
||||
for result in results_b.get('results', []):
|
||||
if "artificial" in result.get('content', '').lower():
|
||||
print(" ✗ FAIL: Found workspace A content in workspace B search!")
|
||||
else:
|
||||
print(" ✓ Workspace B search only shows workspace B content")
|
||||
print("\n=== Documents in ws2 ===")
|
||||
docs2 = get_documents(workspace=ws2)
|
||||
if docs2:
|
||||
for status, doc_list in docs2.get('statuses', {}).items():
|
||||
print(f"{status}: {len(doc_list)}")
|
||||
|
||||
# Test cross-workspace contamination
|
||||
print("\n8. Testing cross-workspace contamination...")
|
||||
# Search for workspace B content in workspace A
|
||||
results_cross = search_documents(workspace_a, "quantum")
|
||||
if results_cross and len(results_cross.get('results', [])) > 0:
|
||||
print(" ✗ FAIL: Found workspace B content when searching in workspace A!")
|
||||
# Search for content in each workspace
|
||||
# test.docx contains "test" maybe? Let's search generic term
|
||||
query = "test"
|
||||
print(f"\n=== Search for '{query}' in ws1 ===")
|
||||
results1 = search(query, workspace=ws1)
|
||||
if results1:
|
||||
print(f"Total results: {results1.get('total_results')}")
|
||||
for i, r in enumerate(results1.get('results', [])[:3]):
|
||||
print(f" {i+1}. {r.get('type')}: {r.get('content')[:80]}...")
|
||||
|
||||
print(f"\n=== Search for '{query}' in ws2 ===")
|
||||
results2 = search(query, workspace=ws2)
|
||||
if results2:
|
||||
print(f"Total results: {results2.get('total_results')}")
|
||||
for i, r in enumerate(results2.get('results', [])[:3]):
|
||||
print(f" {i+1}. {r.get('type')}: {r.get('content')[:80]}...")
|
||||
|
||||
# Verify isolation: ws2 should have fewer results (maybe zero) because ocr.pdf doesn't contain "test"
|
||||
# Actually we can't guarantee; but we can at least verify that search works and returns something.
|
||||
# Let's also search for "OCR" which should be in ocr.pdf but not in test.docx
|
||||
query2 = "OCR"
|
||||
print(f"\n=== Search for '{query2}' in ws1 (should be none) ===")
|
||||
results1b = search(query2, workspace=ws1)
|
||||
if results1b:
|
||||
print(f"Total results: {results1b.get('total_results')}")
|
||||
|
||||
print(f"\n=== Search for '{query2}' in ws2 (should have results) ===")
|
||||
results2b = search(query2, workspace=ws2)
|
||||
if results2b:
|
||||
print(f"Total results: {results2b.get('total_results')}")
|
||||
|
||||
# Now delete workspace ws1
|
||||
print(f"\n=== Deleting workspace {ws1} ===")
|
||||
delete_workspace(ws1)
|
||||
|
||||
# Wait a bit for cleanup
|
||||
time.sleep(5)
|
||||
|
||||
# Try to search in ws1 (should fail or return zero results)
|
||||
print(f"\n=== Search in deleted workspace {ws1} (should fail) ===")
|
||||
results_deleted = search(query, workspace=ws1)
|
||||
if results_deleted:
|
||||
print(f"Unexpectedly got results: {results_deleted.get('total_results')}")
|
||||
else:
|
||||
print(" ✓ No cross-workspace contamination detected")
|
||||
print("Search failed as expected (workspace not found)")
|
||||
|
||||
# Test query endpoints
|
||||
print("\n9. Testing query endpoints...")
|
||||
query_a = query_documents(workspace_a, "What is this document about?")
|
||||
if query_a:
|
||||
print(f" Workspace A query response: {query_a.get('answer', '')[:100]}...")
|
||||
# Verify ws2 still works
|
||||
print(f"\n=== Search in remaining workspace {ws2} ===")
|
||||
results_ws2 = search(query, workspace=ws2)
|
||||
if results_ws2:
|
||||
print(f"Workspace still functional: {results_ws2.get('total_results')} results")
|
||||
|
||||
query_b = query_documents(workspace_b, "What is this document about?")
|
||||
if query_b:
|
||||
print(f" Workspace B query response: {query_b.get('answer', '')[:100]}...")
|
||||
|
||||
# Cleanup (optional)
|
||||
print("\n10. Test completed!")
|
||||
print("\nSummary:")
|
||||
print(" - Workspace isolation appears to be working correctly")
|
||||
print(" - Documents are properly segregated between workspaces")
|
||||
print(" - Search and query operations respect workspace boundaries")
|
||||
print("\nNote: Workspaces will persist in the storage directory.")
|
||||
print(" To clean up manually, delete the directories:")
|
||||
print(f" - {Path('LightRAG-main/rag_storage') / workspace_a}")
|
||||
print(f" - {Path('LightRAG-main/rag_storage') / workspace_b}")
|
||||
|
||||
return True
|
||||
print("\n=== Test completed ===")
|
||||
|
||||
if __name__ == "__main__":
|
||||
try:
|
||||
success = main()
|
||||
sys.exit(0 if success else 1)
|
||||
except KeyboardInterrupt:
|
||||
print("\nTest interrupted by user")
|
||||
sys.exit(1)
|
||||
except Exception as e:
|
||||
print(f"\nError during test: {e}")
|
||||
import traceback
|
||||
traceback.print_exc()
|
||||
sys.exit(1)
|
||||
main()
|
||||
Reference in New Issue
Block a user