import requests import os import json import time # Clear the failed ocr.pdf document and retest base_url = 'http://localhost:3015' # Login first print("šŸ” Logging in...") login_data = {'username': 'jleu3482', 'password': 'jleu1212'} login_response = requests.post(f'{base_url}/login', data=login_data) if login_response.status_code == 200: token = login_response.json().get('access_token') headers = {'Authorization': f'Bearer {token}'} print('āœ… Login successful') # First, let's clear the failed document print("\nšŸ—‘ļø Clearing failed documents...") clear_response = requests.post(f'{base_url}/documents/clear_cache', headers=headers) if clear_response.status_code == 200: print("āœ… Cache cleared successfully") else: print(f"āŒ Cache clear failed: {clear_response.text}") # Wait a moment for cache to clear time.sleep(2) # Now upload ocr.pdf again pdf_file = 'ocr.pdf' if not os.path.exists(pdf_file): print(f"āŒ {pdf_file} not found") exit(1) print(f"\nšŸ“¤ Uploading {pdf_file}...") with open(pdf_file, 'rb') as file: files = {'file': (pdf_file, file, 'application/pdf')} upload_response = requests.post(f'{base_url}/documents/upload', files=files, headers=headers) print(f" Upload Status: {upload_response.status_code}") if upload_response.status_code == 200: result = upload_response.json() print(f" Response: {json.dumps(result, indent=2)}") if result.get('status') == 'success': print("āœ… Upload successful, waiting for processing...") # Wait for processing to complete time.sleep(10) else: print(f"āš ļø Upload status: {result.get('status')}") else: print(f'āŒ Upload failed: {upload_response.text}') # Check document status after upload print("\nšŸ” Checking document status...") status_response = requests.get(f'{base_url}/documents', headers=headers) if status_response.status_code == 200: documents = status_response.json() print(f" Documents response: {json.dumps(documents, indent=2)}") # Test search with content that should be in ocr.pdf print("\nšŸ” Testing search functionality...") test_queries = [ "table content from ocr.pdf", "scanned document", "PDF table data" ] for query in test_queries: print(f"\nšŸ”Ž Querying: \"{query}\"") query_data = {'query': query, 'top_k': 3} search_response = requests.post(f'{base_url}/query', json=query_data, headers=headers) if search_response.status_code == 200: results = search_response.json() if isinstance(results, dict): response_text = results.get('response', '') print(f" Response: {response_text[:200]}...") if 'ocr.pdf' in response_text: print(" āœ… Found reference to ocr.pdf!") if 'table' in response_text.lower(): print(" āœ… Found table content!") else: print(f" Unexpected result format: {results}") else: print(f'āŒ Search failed: {search_response.text}') else: print('āŒ Login failed')