import requests import os import json import time # Test uploading the ocr.pdf file with proper authentication base_url = 'http://localhost:3015' pdf_file = 'ocr.pdf' username = 'jleu3482' password = 'jleu1212' # Check if ocr.pdf exists if not os.path.exists(pdf_file): print(f"āŒ {pdf_file} not found in current directory") exit(1) print(f"šŸ“„ Found {pdf_file}, size: {os.path.getsize(pdf_file)} bytes") # Login first (OAuth2 password flow) print("šŸ” Logging in...") login_data = { "username": username, "password": password, "grant_type": "password", "scope": "", "client_id": None, "client_secret": None } login_response = requests.post(f'{base_url}/login', data=login_data) if login_response.status_code == 200: token_data = login_response.json() access_token = token_data.get('access_token') headers = {'Authorization': f'Bearer {access_token}'} print('āœ… Login successful') # First, let's clear any existing documents to avoid "duplicated" error print("šŸ—‘ļø Clearing existing documents...") clear_response = requests.delete(f'{base_url}/documents', headers=headers) if clear_response.status_code == 200: print("āœ… Documents cleared") else: print(f"āš ļø Could not clear documents: {clear_response.text}") # Wait a moment for cleanup time.sleep(2) # Upload ocr.pdf print(f"šŸ“¤ Uploading {pdf_file}...") with open(pdf_file, 'rb') as file: files = {'file': (pdf_file, file, 'application/pdf')} upload_response = requests.post(f'{base_url}/documents/upload', files=files, headers=headers) print(f" Upload Status: {upload_response.status_code}") if upload_response.status_code == 200: result = upload_response.json() print(f" Response: {json.dumps(result, indent=2)}") if result.get('status') == 'success': print('āœ… Upload successful') track_id = result.get('track_id') # Wait for processing to complete print("\nā³ Waiting for document processing...") time.sleep(10) # Check document status print("\nšŸ” Checking document status...") status_response = requests.get(f'{base_url}/documents', headers=headers) if status_response.status_code == 200: documents = status_response.json() print(f" Documents: {json.dumps(documents, indent=2)}") # Look for our document in the response if 'statuses' in documents: for status_type, doc_list in documents['statuses'].items(): for doc in doc_list: if doc.get('file_path') == pdf_file: print(f"\nšŸ“Š Document Status:") print(f" File: {doc.get('file_path')}") print(f" Status: {doc.get('status')}") print(f" Error: {doc.get('error_msg', 'None')}") print(f" Content Length: {doc.get('content_length', 'Unknown')}") break else: print(f"āŒ Failed to get documents: {status_response.text}") # Test search with content from ocr.pdf print("\nšŸ” Testing search functionality...") query_data = {'query': 'safety precautions minimum safe distance high voltage', 'top_k': 5} search_response = requests.post(f'{base_url}/query', json=query_data, headers=headers) if search_response.status_code == 200: results = search_response.json() print(f"āœ… Search successful") print(f" Response: {results.get('response', 'No response field')}") else: print(f'āŒ Search failed: {search_response.text}') else: print(f'āŒ Upload failed: {result.get("message", "Unknown error")}') else: print(f'āŒ Upload failed with status {upload_response.status_code}: {upload_response.text}') else: print(f'āŒ Login failed: {login_response.text}')