import requests import json import time from pathlib import Path # Configuration BASE_URL = "http://localhost:3015" USERNAME = "jleu3482" PASSWORD = "jleu1212" OCR_PDF_PATH = "ocr.pdf" def test_ocr_upload_with_oauth(): print("Testing OCR PDF upload with OAuth2 authentication...") print(f"Username: {USERNAME}") print(f"Password: {PASSWORD}") print(f"OCR PDF: {OCR_PDF_PATH}") # Step 1: Login and get access token print("\n1. Logging in...") login_data = { "username": USERNAME, "password": PASSWORD, "grant_type": "password" } try: login_response = requests.post(f"{BASE_URL}/login", data=login_data) print(f"Login response status: {login_response.status_code}") print(f"Login response text: {login_response.text}") if login_response.status_code == 200: token_data = login_response.json() access_token = token_data.get("access_token") print(f"✓ Login successful! Access token obtained") else: print(f"✗ Login failed: {login_response.status_code} - {login_response.text}") return False except Exception as e: print(f"✗ Login error: {e}") return False # Step 2: Clear existing documents first print("\n2. Clearing existing documents...") headers = {"Authorization": f"Bearer {access_token}"} try: clear_response = requests.delete(f"{BASE_URL}/documents", headers=headers) if clear_response.status_code == 200: clear_result = clear_response.json() print(f"✓ Documents cleared successfully: {clear_result}") else: print(f"⚠ Clear documents response: {clear_response.status_code} - {clear_response.text}") except Exception as e: print(f"⚠ Clear documents error: {e}") # Step 3: Upload OCR PDF print("\n3. Uploading OCR PDF...") if not Path(OCR_PDF_PATH).exists(): print(f"✗ OCR PDF file not found: {OCR_PDF_PATH}") return False files = {"file": (OCR_PDF_PATH, open(OCR_PDF_PATH, "rb"), "application/pdf")} try: upload_response = requests.post(f"{BASE_URL}/documents/upload", files=files, headers=headers) print(f"Upload response status: {upload_response.status_code}") print(f"Upload response text: {upload_response.text}") if upload_response.status_code == 200: upload_result = upload_response.json() print(f"✓ Upload successful! Result: {json.dumps(upload_result, indent=2)}") # Step 4: Wait for processing and check document status print("\n4. Waiting for document processing...") time.sleep(10) # Give more time for OCR processing try: status_response = requests.get(f"{BASE_URL}/documents", headers=headers) if status_response.status_code == 200: documents = status_response.json() print(f"✓ Documents status: {json.dumps(documents, indent=2)}") else: print(f"⚠ Documents status check failed: {status_response.status_code}") except Exception as e: print(f"⚠ Documents status check error: {e}") # Step 5: Test search functionality print("\n5. Testing search functionality...") search_data = { "query": "safety precautions", "top_k": 5 } try: search_response = requests.post(f"{BASE_URL}/query", json=search_data, headers=headers) if search_response.status_code == 200: search_results = search_response.json() print(f"✓ Search successful! Response: {json.dumps(search_results, indent=2)}") else: print(f"⚠ Search failed: {search_response.status_code} - {search_response.text}") except Exception as e: print(f"⚠ Search error: {e}") return True else: print(f"✗ Upload failed: {upload_response.status_code} - {upload_response.text}") return False except Exception as e: print(f"✗ Upload error: {e}") return False finally: if 'files' in locals(): files["file"][1].close() if __name__ == "__main__": success = test_ocr_upload_with_oauth() if success: print("\n🎉 OCR upload test with OAuth2 authentication completed successfully!") else: print("\n❌ OCR upload test with OAuth2 authentication failed!")