import requests import json import time import os def test_ocr_upload_workflow(): """Test OCR PDF upload, indexing, and search without authentication""" base_url = "http://localhost:3015" print("Testing OCR PDF upload workflow without authentication...") # Test 1: Check server status print("\n1. Testing server status...") try: response = requests.get(f"{base_url}/health") if response.status_code == 200: status_data = response.json() print(f"✓ Server is running - Status: {status_data.get('status')}") print(f" Auth mode: {status_data.get('auth_mode')}") else: print(f"✗ Server returned status: {response.status_code}") return False except Exception as e: print(f"✗ Cannot connect to server: {e}") return False # Test 2: Check authentication status print("\n2. Testing authentication status...") try: response = requests.get(f"{base_url}/auth-status") if response.status_code == 200: auth_data = response.json() print(f"✓ Auth status: {auth_data.get('auth_configured')}") print(f" Auth mode: {auth_data.get('auth_mode')}") if auth_data.get('auth_configured'): print("✗ Authentication is still enabled!") return False else: print("✓ Authentication is disabled - guest access enabled") else: print(f"✗ Auth status check failed: {response.status_code}") return False except Exception as e: print(f"✗ Auth status check failed: {e}") return False # Test 3: Upload OCR PDF file print("\n3. Uploading OCR PDF file...") try: with open("ocr.pdf", "rb") as file: files = {"file": ("ocr.pdf", file, "application/pdf")} response = requests.post(f"{base_url}/documents", files=files) if response.status_code == 200: upload_data = response.json() print(f"✓ File uploaded successfully") print(f" Document ID: {upload_data.get('document_id')}") print(f" Status: {upload_data.get('status')}") else: print(f"✗ Upload failed: {response.status_code} - {response.text}") return False except Exception as e: print(f"✗ Upload failed: {e}") return False # Test 4: Monitor indexing progress print("\n4. Monitoring indexing progress...") max_wait_time = 120 # 2 minutes max wait_interval = 5 elapsed_time = 0 while elapsed_time < max_wait_time: try: response = requests.get(f"{base_url}/documents") if response.status_code == 200: docs_data = response.json() if docs_data: latest_doc = docs_data[0] status = latest_doc.get('status') print(f" Current status: {status} (waited {elapsed_time}s)") if status == "completed": print("✓ Indexing completed successfully!") break elif status == "failed": print("✗ Indexing failed!") return False else: print(" No documents found") else: print(f" Failed to get document status: {response.status_code}") time.sleep(wait_interval) elapsed_time += wait_interval except Exception as e: print(f" Error checking status: {e}") time.sleep(wait_interval) elapsed_time += wait_interval if elapsed_time >= max_wait_time: print("✗ Indexing timeout reached") return False # Test 5: Test search functionality print("\n5. Testing search functionality...") try: search_query = "document text content" search_data = { "query": search_query, "top_k": 5 } response = requests.post(f"{base_url}/search", json=search_data) if response.status_code == 200: search_results = response.json() print(f"✓ Search successful") print(f" Found {len(search_results.get('results', []))} results") # Display first result if available if search_results.get('results'): first_result = search_results['results'][0] print(f" First result score: {first_result.get('score')}") print(f" First result content preview: {first_result.get('content', '')[:100]}...") else: print(" No search results returned") else: print(f"✗ Search failed: {response.status_code} - {response.text}") return False except Exception as e: print(f"✗ Search test failed: {e}") return False # Test 6: Test query endpoint (RAG functionality) print("\n6. Testing RAG query functionality...") try: query_data = { "query": "What is this document about?", "top_k": 3 } response = requests.post(f"{base_url}/query", json=query_data) if response.status_code == 200: query_result = response.json() print(f"✓ Query successful") print(f" Response: {query_result.get('response', '')[:200]}...") print(f" Sources: {len(query_result.get('sources', []))}") else: print(f"✗ Query failed: {response.status_code} - {response.text}") return False except Exception as e: print(f"✗ Query test failed: {e}") return False print("\n🎉 All tests passed! OCR PDF upload, indexing, and search workflow is working correctly without authentication.") return True if __name__ == "__main__": success = test_ocr_upload_workflow() if not success: print("\n❌ Some tests failed. Check the server status and configuration.") exit(1)