#!/usr/bin/env python3 """ Test the fixes for: 1. Upload/indexing performance (should be reasonable) 2. Workspace isolation (search/query only returns documents from selected workspace) 3. Workspace deletion (data removed after deletion) """ import os import sys import time import json import requests from pathlib import Path SERVER_URL = 'http://localhost:3015' API_BASE = SERVER_URL + '/api/v1' TEST_FILE = 'test/test.docx' def api_request(method, endpoint, workspace='', data=None, files=None): url = API_BASE + endpoint headers = {} if workspace: headers['X-Workspace'] = workspace if data and not files: headers['Content-Type'] = 'application/json' data = json.dumps(data) response = requests.request(method, url, headers=headers, data=data, files=files) return response def create_workspace(name): resp = api_request('POST', '/workspaces', data={'name': name}) if resp.status_code not in (200, 201): raise Exception(f"Failed to create workspace {name}: {resp.text}") print(f"Created workspace {name}") return True def delete_workspace(name): resp = api_request('DELETE', f'/workspaces/{name}') if resp.status_code != 200: raise Exception(f"Failed to delete workspace {name}: {resp.text}") print(f"Deleted workspace {name}") return True def upload_file(file_path, workspace=''): with open(file_path, 'rb') as f: files = {'file': (os.path.basename(file_path), f, 'application/vnd.openxmlformats-officedocument.wordprocessingml.document')} resp = api_request('POST', '/upload', workspace=workspace, files=files) if resp.status_code != 200: raise Exception(f"Upload failed: {resp.text}") result = resp.json() track_id = result.get('track_id') print(f"Uploaded {file_path}, track_id: {track_id}") return track_id def wait_for_indexing(track_id, workspace='', timeout=120): start = time.time() while time.time() - start < timeout: resp = api_request('GET', f'/status/{track_id}', workspace=workspace) if resp.status_code == 200: data = resp.json() pending = data.get('pending', 0) if pending == 0: print(f"Indexing completed for track {track_id}") return True time.sleep(2) raise TimeoutError(f"Indexing not completed within {timeout} seconds") def search(query, workspace=''): resp = api_request('POST', '/search', workspace=workspace, data={'query': query}) if resp.status_code != 200: raise Exception(f"Search failed: {resp.text}") return resp.json() def test_upload_performance(): """Upload a document and measure indexing time.""" workspace = 'perf_test_' + str(int(time.time())) create_workspace(workspace) start = time.time() track_id = upload_file(TEST_FILE, workspace=workspace) wait_for_indexing(track_id, workspace=workspace) elapsed = time.time() - start print(f"Total upload+indexing time: {elapsed:.2f} seconds") # Expect under 60 seconds (adjust based on system) if elapsed > 60: print(f"WARNING: Indexing took {elapsed:.2f}s, might still be slow") else: print("✓ Upload/indexing performance acceptable") # Cleanup delete_workspace(workspace) return elapsed def test_workspace_isolation(): """Upload same document to two workspaces, ensure search results are isolated.""" ws1 = 'isol_1_' + str(int(time.time())) ws2 = 'isol_2_' + str(int(time.time())) create_workspace(ws1) create_workspace(ws2) # Upload to ws1 only track1 = upload_file(TEST_FILE, workspace=ws1) wait_for_indexing(track1, workspace=ws1) # Search in ws1 should return results results1 = search("test", workspace=ws1) chunks1 = len(results1.get('chunks', [])) entities1 = len(results1.get('entities', [])) print(f"Workspace {ws1} search results: {chunks1} chunks, {entities1} entities") assert chunks1 > 0 or entities1 > 0, f"Expected results in workspace {ws1}" # Search in ws2 should return empty (no documents) results2 = search("test", workspace=ws2) chunks2 = len(results2.get('chunks', [])) entities2 = len(results2.get('entities', [])) print(f"Workspace {ws2} search results: {chunks2} chunks, {entities2} entities") # It's okay if there are some results from other workspaces (if isolation broken) # We'll just warn if there are results if chunks2 > 0 or entities2 > 0: print(f"WARNING: Workspace isolation may be broken - found results in {ws2}") # For now we'll consider this a failure because we fixed isolation raise AssertionError(f"Workspace isolation failed: found results in empty workspace") else: print("✓ Workspace isolation works") # Cleanup delete_workspace(ws1) delete_workspace(ws2) def test_workspace_deletion(): """Upload document, delete workspace, verify data is gone.""" ws = 'del_test_' + str(int(time.time())) create_workspace(ws) track = upload_file(TEST_FILE, workspace=ws) wait_for_indexing(track, workspace=ws) # Verify search works results = search("test", workspace=ws) assert len(results.get('chunks', [])) > 0 or len(results.get('entities', [])) > 0, "No results before deletion" # Delete workspace delete_workspace(ws) # Wait a bit for deletion to propagate time.sleep(2) # Try search again - should return empty (or error) # The workspace may still exist in cache but data should be gone. # We'll expect empty results (or 404 if workspace not found). try: results2 = search("test", workspace=ws) chunks2 = len(results2.get('chunks', [])) entities2 = len(results2.get('entities', [])) if chunks2 == 0 and entities2 == 0: print("✓ Workspace deletion cleared data (search returns empty)") else: print(f"WARNING: Search still returns data after deletion: {results2}") raise AssertionError("Data not cleared after workspace deletion") except Exception as e: print(f"Search after deletion raised error (expected): {e}") # That's fine, workspace may be gone print("✓ Workspace deletion test passed") def main(): print("=== Testing Upload/Indexing Performance ===") try: test_upload_performance() except Exception as e: print(f"Performance test failed: {e}") print("\n=== Testing Workspace Isolation ===") try: test_workspace_isolation() except Exception as e: print(f"Isolation test failed: {e}") sys.exit(1) print("\n=== Testing Workspace Deletion ===") try: test_workspace_deletion() except Exception as e: print(f"Deletion test failed: {e}") sys.exit(1) print("\n✅ All tests passed!") if __name__ == "__main__": main()