workspace working
This commit is contained in:
79
test_ocr_retrieval.py
Normal file
79
test_ocr_retrieval.py
Normal file
@@ -0,0 +1,79 @@
|
||||
import requests
|
||||
import json
|
||||
import time
|
||||
|
||||
API_BASE = 'http://localhost:3015'
|
||||
API_KEY = 'jleu1212'
|
||||
FILE_PATH = 'test/ocr.pdf'
|
||||
|
||||
def api_request(method, endpoint, workspace='', data=None, files=None):
|
||||
url = API_BASE + endpoint
|
||||
headers = {'X-API-Key': API_KEY}
|
||||
if workspace:
|
||||
headers['X-Workspace'] = workspace
|
||||
if data and not files:
|
||||
headers['Content-Type'] = 'application/json'
|
||||
data = json.dumps(data)
|
||||
response = requests.request(method, url, headers=headers, data=data, files=files)
|
||||
return response
|
||||
|
||||
def upload_and_wait(file_path, workspace=''):
|
||||
print(f'Uploading {file_path}...')
|
||||
with open(file_path, 'rb') as f:
|
||||
files = {'file': (file_path.split('/')[-1], f, 'application/pdf')}
|
||||
resp = api_request('POST', '/documents/upload', workspace=workspace, files=files)
|
||||
if resp.status_code != 200:
|
||||
print(f'Upload failed: {resp.text}')
|
||||
return None
|
||||
result = resp.json()
|
||||
track_id = result.get('track_id')
|
||||
print(f'Track ID: {track_id}')
|
||||
# wait for indexing
|
||||
start = time.time()
|
||||
while time.time() - start < 120:
|
||||
resp = api_request('GET', f'/documents/track_status/{track_id}', workspace=workspace)
|
||||
if resp.status_code == 200:
|
||||
data = resp.json()
|
||||
total = data.get('total_count', 0)
|
||||
processed = data.get('status_summary', {}).get('PROCESSED', 0)
|
||||
failed = data.get('status_summary', {}).get('FAILED', 0)
|
||||
pending = data.get('status_summary', {}).get('PENDING', 0)
|
||||
print(f'Status: total={total}, processed={processed}, failed={failed}, pending={pending}')
|
||||
if pending == 0:
|
||||
print('Indexing completed.')
|
||||
return track_id
|
||||
time.sleep(2)
|
||||
raise TimeoutError('Indexing timeout')
|
||||
|
||||
def search(query, workspace=''):
|
||||
print(f'Searching for "{query}" in workspace {workspace if workspace else "default"}')
|
||||
resp = api_request('POST', '/search', workspace=workspace, data={'query': query})
|
||||
if resp.status_code != 200:
|
||||
print(f'Search failed: {resp.text}')
|
||||
return None
|
||||
return resp.json()
|
||||
|
||||
def main():
|
||||
# Use default workspace (empty)
|
||||
workspace = ''
|
||||
# Upload if needed (maybe already uploaded)
|
||||
# track_id = upload_and_wait(FILE_PATH, workspace)
|
||||
# if not track_id:
|
||||
# return
|
||||
# Search
|
||||
results = search('what is the minimum safe working distance', workspace)
|
||||
if results:
|
||||
print('Search results:')
|
||||
print(json.dumps(results, indent=2))
|
||||
chunks = results.get('chunks', [])
|
||||
entities = results.get('entities', [])
|
||||
print(f'Found {len(chunks)} chunks, {len(entities)} entities')
|
||||
if chunks:
|
||||
print('First chunk text:', chunks[0].get('text', '')[:200])
|
||||
else:
|
||||
print('No chunks returned.')
|
||||
else:
|
||||
print('No results.')
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
Reference in New Issue
Block a user