import requests import json import time API_BASE = 'http://localhost:3015' API_KEY = 'jleu1212' FILE_PATH = 'test/ocr.pdf' def api_request(method, endpoint, workspace='', data=None, files=None): url = API_BASE + endpoint headers = {'X-API-Key': API_KEY} if workspace: headers['X-Workspace'] = workspace if data and not files: headers['Content-Type'] = 'application/json' data = json.dumps(data) response = requests.request(method, url, headers=headers, data=data, files=files) return response def upload_and_wait(file_path, workspace=''): print(f'Uploading {file_path}...') with open(file_path, 'rb') as f: files = {'file': (file_path.split('/')[-1], f, 'application/pdf')} resp = api_request('POST', '/documents/upload', workspace=workspace, files=files) if resp.status_code != 200: print(f'Upload failed: {resp.text}') return None result = resp.json() track_id = result.get('track_id') print(f'Track ID: {track_id}') # wait for indexing start = time.time() while time.time() - start < 120: resp = api_request('GET', f'/documents/track_status/{track_id}', workspace=workspace) if resp.status_code == 200: data = resp.json() total = data.get('total_count', 0) processed = data.get('status_summary', {}).get('PROCESSED', 0) failed = data.get('status_summary', {}).get('FAILED', 0) pending = data.get('status_summary', {}).get('PENDING', 0) print(f'Status: total={total}, processed={processed}, failed={failed}, pending={pending}') if pending == 0: print('Indexing completed.') return track_id time.sleep(2) raise TimeoutError('Indexing timeout') def search(query, workspace=''): print(f'Searching for "{query}" in workspace {workspace if workspace else "default"}') resp = api_request('POST', '/search', workspace=workspace, data={'query': query}) if resp.status_code != 200: print(f'Search failed: {resp.text}') return None return resp.json() def main(): # Use default workspace (empty) workspace = '' # Upload if needed (maybe already uploaded) # track_id = upload_and_wait(FILE_PATH, workspace) # if not track_id: # return # Search results = search('what is the minimum safe working distance', workspace) if results: print('Search results:') print(json.dumps(results, indent=2)) chunks = results.get('chunks', []) entities = results.get('entities', []) print(f'Found {len(chunks)} chunks, {len(entities)} entities') if chunks: print('First chunk text:', chunks[0].get('text', '')[:200]) else: print('No chunks returned.') else: print('No results.') if __name__ == '__main__': main()