import requests import time import sys import json from pathlib import Path SERVER_URL = "http://localhost:3015" API_KEY = "jleu1212" def get_headers(workspace=None): headers = {"X-API-Key": API_KEY} if workspace: headers["X-Workspace"] = workspace return headers def upload_file(file_path, workspace=None): """Upload file to workspace""" headers = get_headers(workspace) with open(file_path, 'rb') as f: files = {'file': (file_path.name, f)} resp = requests.post(f"{SERVER_URL}/documents/upload", files=files, headers=headers) if resp.status_code != 200: print(f"Upload failed: {resp.status_code} {resp.text}") return None data = resp.json() print(f"Uploaded {file_path.name} to workspace {workspace}: track_id {data.get('track_id')}") return data.get('track_id') def wait_for_indexing(timeout=120): """Wait until pipeline is not busy""" start = time.time() while time.time() - start < timeout: resp = requests.get(f"{SERVER_URL}/documents/pipeline_status", headers=get_headers()) if resp.status_code == 200: data = resp.json() if not data.get('busy', False): print("Pipeline idle, indexing likely complete") return True else: print(f"Pipeline busy: {data.get('job_name')} {data.get('cur_batch')}/{data.get('batchs')}") else: print(f"Failed to get pipeline status: {resp.status_code}") time.sleep(5) print("Timeout waiting for indexing") return False def search(query, workspace=None): headers = get_headers(workspace) resp = requests.post(f"{SERVER_URL}/search", json={"query": query}, headers=headers) if resp.status_code != 200: print(f"Search failed: {resp.status_code} {resp.text}") return None data = resp.json() return data def list_workspaces(): resp = requests.get(f"{SERVER_URL}/workspaces/", headers=get_headers()) if resp.status_code == 200: return resp.json() else: print(f"Failed to list workspaces: {resp.status_code} {resp.text}") return [] def create_workspace(name): resp = requests.post(f"{SERVER_URL}/workspaces/", json={"name": name}, headers=get_headers()) if resp.status_code == 200: print(f"Created workspace {name}") return True else: print(f"Failed to create workspace: {resp.status_code} {resp.text}") return False def delete_workspace(name): resp = requests.delete(f"{SERVER_URL}/workspaces/{name}", headers=get_headers()) if resp.status_code == 200: print(f"Deleted workspace {name}") return True else: print(f"Failed to delete workspace: {resp.status_code} {resp.text}") return False def get_documents(workspace=None): headers = get_headers(workspace) resp = requests.get(f"{SERVER_URL}/documents", headers=headers) if resp.status_code == 200: data = resp.json() return data else: print(f"Failed to get documents: {resp.status_code} {resp.text}") return None def main(): # Ensure test files exist test_dir = Path("test") if not test_dir.exists(): print("Test directory not found") sys.exit(1) file1 = test_dir / "test.docx" file2 = test_dir / "ocr.pdf" if not file1.exists() or not file2.exists(): print("Test files missing") sys.exit(1) # Create fresh workspaces ws1 = "isolated_ws1" ws2 = "isolated_ws2" # Delete if they already exist (cleanup) workspaces = list_workspaces() for ws in workspaces: if ws['name'] in [ws1, ws2]: delete_workspace(ws['name']) # Create workspaces create_workspace(ws1) create_workspace(ws2) # Upload file1 to ws1 track1 = upload_file(file1, workspace=ws1) if not track1: print("Failed to upload file1") sys.exit(1) # Upload file2 to ws2 track2 = upload_file(file2, workspace=ws2) if not track2: print("Failed to upload file2") sys.exit(1) # Wait for indexing print("Waiting for indexing...") if not wait_for_indexing(): print("Indexing timed out, but continuing") # Give extra time for processing time.sleep(10) # Check documents in each workspace print("\n=== Documents in ws1 ===") docs1 = get_documents(workspace=ws1) if docs1: for status, doc_list in docs1.get('statuses', {}).items(): print(f"{status}: {len(doc_list)}") print("\n=== Documents in ws2 ===") docs2 = get_documents(workspace=ws2) if docs2: for status, doc_list in docs2.get('statuses', {}).items(): print(f"{status}: {len(doc_list)}") # Search for content in each workspace # test.docx contains "test" maybe? Let's search generic term query = "test" print(f"\n=== Search for '{query}' in ws1 ===") results1 = search(query, workspace=ws1) if results1: print(f"Total results: {results1.get('total_results')}") for i, r in enumerate(results1.get('results', [])[:3]): print(f" {i+1}. {r.get('type')}: {r.get('content')[:80]}...") print(f"\n=== Search for '{query}' in ws2 ===") results2 = search(query, workspace=ws2) if results2: print(f"Total results: {results2.get('total_results')}") for i, r in enumerate(results2.get('results', [])[:3]): print(f" {i+1}. {r.get('type')}: {r.get('content')[:80]}...") # Verify isolation: ws2 should have fewer results (maybe zero) because ocr.pdf doesn't contain "test" # Actually we can't guarantee; but we can at least verify that search works and returns something. # Let's also search for "OCR" which should be in ocr.pdf but not in test.docx query2 = "OCR" print(f"\n=== Search for '{query2}' in ws1 (should be none) ===") results1b = search(query2, workspace=ws1) if results1b: print(f"Total results: {results1b.get('total_results')}") print(f"\n=== Search for '{query2}' in ws2 (should have results) ===") results2b = search(query2, workspace=ws2) if results2b: print(f"Total results: {results2b.get('total_results')}") # Now delete workspace ws1 print(f"\n=== Deleting workspace {ws1} ===") delete_workspace(ws1) # Wait a bit for cleanup time.sleep(5) # Try to search in ws1 (should fail or return zero results) print(f"\n=== Search in deleted workspace {ws1} (should fail) ===") results_deleted = search(query, workspace=ws1) if results_deleted: print(f"Unexpectedly got results: {results_deleted.get('total_results')}") else: print("Search failed as expected (workspace not found)") # Verify ws2 still works print(f"\n=== Search in remaining workspace {ws2} ===") results_ws2 = search(query, workspace=ws2) if results_ws2: print(f"Workspace still functional: {results_ws2.get('total_results')} results") print("\n=== Test completed ===") if __name__ == "__main__": main()