import requests import json import time def login_and_get_token(base_url): """Login and get authentication token""" login_data = { "username": "jleu3482", "password": "jleu1212" } print("Logging in...") login_response = requests.post( f"{base_url}/login", data=login_data, headers={"Content-Type": "application/x-www-form-urlencoded"} ) if login_response.status_code == 200: token = login_response.json().get("access_token") print("✅ Login successful") return {"Authorization": f"Bearer {token}"} else: print(f"❌ Login failed: {login_response.status_code}") print(f"Response: {login_response.text}") return None def test_ocr_upload_and_search(): """Test the complete OCR PDF upload and search workflow""" base_url = "http://localhost:3015" # Get authentication token headers = login_and_get_token(base_url) if not headers: return # Test server health print("\nTesting server health...") health_response = requests.get(f"{base_url}/health", headers=headers) if health_response.status_code == 200: print("✅ Server is healthy") health_data = health_response.json() print(f"LLM Binding: {health_data['configuration']['llm_binding']}") print(f"LLM Host: {health_data['configuration']['llm_binding_host']}") print(f"Embedding Model: {health_data['configuration']['embedding_model']}") # Check if DeepSeek configuration is correct if "deepseek.com" in health_data['configuration']['llm_binding_host']: print("✅ DeepSeek API configuration is correct") else: print("❌ DeepSeek API configuration is incorrect") else: print(f"❌ Server health check failed: {health_response.status_code}") return # Test search with OCR content print("\n=== Testing Search with OCR Content ===") search_data = { "query": "optical character recognition", "top_k": 3 } try: search_response = requests.post(f"{base_url}/search", json=search_data, headers=headers) print(f"Search response status: {search_response.status_code}") if search_response.status_code == 200: search_results = search_response.json() print("✅ Search successful!") print(f"Found {len(search_results.get('results', []))} results") # Display results for i, result in enumerate(search_results.get('results', [])): print(f"\nResult {i+1}:") print(f" Score: {result.get('score', 0):.4f}") print(f" Content: {result.get('content', '')[:200]}...") print(f" Source: {result.get('source', '')}") else: print(f"Search error: {search_response.text}") except Exception as e: print(f"Search request failed: {e}") # Test document list to verify OCR PDF was processed print("\n=== Testing Document List ===") try: docs_response = requests.get(f"{base_url}/documents", headers=headers) if docs_response.status_code == 200: docs_data = docs_response.json() print(f"Found {len(docs_data.get('documents', []))} documents") for doc in docs_data.get('documents', []): print(f" - {doc.get('name', '')} (Status: {doc.get('status', '')})") else: print(f"Documents list failed: {docs_response.status_code}") except Exception as e: print(f"Documents request failed: {e}") if __name__ == "__main__": test_ocr_upload_and_search()