import requests import json import os import time # Server configuration base_url = 'http://localhost:3015' pdf_file = 'LightRAG-main/test_documents/ocr.pdf' print('šŸš€ Testing OCR PDF upload, indexing and search workflow...') print(f'šŸ“ Using PDF: {pdf_file}') if not os.path.exists(pdf_file): print('āŒ Test file not found') else: print('āœ… Test file found') # Login first to get token print('šŸ” Logging in...') login_data = { 'username': 'jleu3482', 'password': 'jleu1212' } try: # Login login_response = requests.post(f'{base_url}/login', data=login_data) if login_response.status_code == 200: token = login_response.json().get('access_token') headers = {'Authorization': f'Bearer {token}'} print('āœ… Login successful') # Upload the PDF print('šŸ“¤ Uploading PDF...') with open(pdf_file, 'rb') as file: files = {'file': ('sample_ocr.pdf', file, 'application/pdf')} upload_response = requests.post(f'{base_url}/documents/upload', files=files, headers=headers) print(f'Upload response: {upload_response.status_code}') if upload_response.status_code == 200: result = upload_response.json() print('āœ… Upload successful!') print(f'Upload result: {json.dumps(result, indent=2)}') # Wait for processing and indexing print('ā³ Waiting for OCR processing and indexing (30 seconds)...') time.sleep(30) # Test search with meaningful queries from OCR content print('šŸ” Testing search with OCR-extracted content...') search_queries = ['document', 'test', 'sample', 'content', 'pdf'] for query in search_queries: print(f'\nšŸ”Ž Searching for: "{query}"') search_data = {'query': query, 'top_k': 5} search_response = requests.post(f'{base_url}/search', json=search_data, headers=headers) if search_response.status_code == 200: search_results = search_response.json() print(f'āœ… Search successful! Found {len(search_results)} results') for i, result in enumerate(search_results): content = result.get('content', '') score = result.get('score', 0) print(f' Result {i+1} (score: {score:.3f}): {content[:150]}...') else: print(f'āŒ Search failed: {search_response.status_code} - {search_response.text}') else: print(f'āŒ Upload failed: {upload_response.status_code} - {upload_response.text}') else: print(f'āŒ Login failed: {login_response.status_code} - {login_response.text}') except Exception as e: print(f'āŒ Error: {e}')