79 lines
2.8 KiB
Python
79 lines
2.8 KiB
Python
import requests
|
|
import json
|
|
import time
|
|
|
|
API_BASE = 'http://localhost:3015'
|
|
API_KEY = 'jleu1212'
|
|
FILE_PATH = 'test/ocr.pdf'
|
|
|
|
def api_request(method, endpoint, workspace='', data=None, files=None):
|
|
url = API_BASE + endpoint
|
|
headers = {'X-API-Key': API_KEY}
|
|
if workspace:
|
|
headers['X-Workspace'] = workspace
|
|
if data and not files:
|
|
headers['Content-Type'] = 'application/json'
|
|
data = json.dumps(data)
|
|
response = requests.request(method, url, headers=headers, data=data, files=files)
|
|
return response
|
|
|
|
def upload_and_wait(file_path, workspace=''):
|
|
print(f'Uploading {file_path}...')
|
|
with open(file_path, 'rb') as f:
|
|
files = {'file': (file_path.split('/')[-1], f, 'application/pdf')}
|
|
resp = api_request('POST', '/documents/upload', workspace=workspace, files=files)
|
|
if resp.status_code != 200:
|
|
print(f'Upload failed: {resp.text}')
|
|
return None
|
|
result = resp.json()
|
|
track_id = result.get('track_id')
|
|
print(f'Track ID: {track_id}')
|
|
# wait for indexing
|
|
start = time.time()
|
|
while time.time() - start < 120:
|
|
resp = api_request('GET', f'/documents/track_status/{track_id}', workspace=workspace)
|
|
if resp.status_code == 200:
|
|
data = resp.json()
|
|
total = data.get('total_count', 0)
|
|
processed = data.get('status_summary', {}).get('PROCESSED', 0)
|
|
failed = data.get('status_summary', {}).get('FAILED', 0)
|
|
pending = data.get('status_summary', {}).get('PENDING', 0)
|
|
print(f'Status: total={total}, processed={processed}, failed={failed}, pending={pending}')
|
|
if pending == 0:
|
|
print('Indexing completed.')
|
|
return track_id
|
|
time.sleep(2)
|
|
raise TimeoutError('Indexing timeout')
|
|
|
|
def search(query, workspace=''):
|
|
print(f'Searching for "{query}" in workspace {workspace if workspace else "default"}')
|
|
resp = api_request('POST', '/search', workspace=workspace, data={'query': query})
|
|
if resp.status_code != 200:
|
|
print(f'Search failed: {resp.text}')
|
|
return None
|
|
return resp.json()
|
|
|
|
def main():
|
|
# Use default workspace (empty)
|
|
workspace = ''
|
|
# Upload if needed (maybe already uploaded)
|
|
# track_id = upload_and_wait(FILE_PATH, workspace)
|
|
# if not track_id:
|
|
# return
|
|
# Search
|
|
results = search('what is the minimum safe working distance', workspace)
|
|
if results:
|
|
print('Search results:')
|
|
print(json.dumps(results, indent=2))
|
|
chunks = results.get('chunks', [])
|
|
entities = results.get('entities', [])
|
|
print(f'Found {len(chunks)} chunks, {len(entities)} entities')
|
|
if chunks:
|
|
print('First chunk text:', chunks[0].get('text', '')[:200])
|
|
else:
|
|
print('No chunks returned.')
|
|
else:
|
|
print('No results.')
|
|
|
|
if __name__ == '__main__':
|
|
main() |