""" Robust document upload script with timeout handling and retries """ import requests import os import time from typing import Optional, Dict, Any class RobustDocumentUploader: def __init__(self, base_url: str = "http://localhost:3015", api_key: str = "jleu1212"): self.base_url = base_url self.api_key = api_key self.default_timeout = 30 # seconds self.max_retries = 3 self.retry_delay = 5 # seconds def upload_document(self, file_path: str, timeout: Optional[int] = None) -> Dict[str, Any]: """ Upload a document with robust error handling and retries Args: file_path: Path to the document file timeout: Timeout in seconds (default: 30) Returns: Dictionary with upload results """ if not os.path.exists(file_path): return { "success": False, "error": f"File not found: {file_path}", "status_code": None } file_name = os.path.basename(file_path) file_size = os.path.getsize(file_path) print(f"šŸ“¤ Uploading {file_name} ({file_size:,} bytes) to {self.base_url}") # Determine content type based on file extension content_type = self._get_content_type(file_name) # Try multiple endpoints endpoints = [ f"{self.base_url}/documents/upload", f"{self.base_url}/upload", f"{self.base_url}/api/documents/upload" ] headers = {"X-API-Key": self.api_key} timeout = timeout or self.default_timeout last_error = None for retry in range(self.max_retries): for endpoint in endpoints: try: print(f" Attempt {retry + 1}/{self.max_retries}: {endpoint}") with open(file_path, 'rb') as f: files = {'file': (file_name, f, content_type)} response = requests.post( endpoint, files=files, headers=headers, timeout=timeout ) print(f" Status: {response.status_code}") if response.status_code == 200: print(f"āœ… Upload successful!") return { "success": True, "status_code": response.status_code, "response": response.json() if response.text else {}, "endpoint": endpoint, "retry_count": retry } elif response.status_code == 504: print(f"ā° Gateway timeout (504) - server processing may be slow") last_error = f"Gateway timeout: {response.text[:200]}" else: print(f"āŒ Upload failed: {response.status_code} - {response.text[:200]}") last_error = f"HTTP {response.status_code}: {response.text[:200]}" except requests.exceptions.Timeout: print(f"ā° Request timeout after {timeout} seconds") last_error = f"Request timeout after {timeout} seconds" except requests.exceptions.ConnectionError as e: print(f"šŸ”Œ Connection error: {e}") last_error = f"Connection error: {e}" except Exception as e: print(f"āŒ Unexpected error: {type(e).__name__}: {e}") last_error = f"{type(e).__name__}: {e}" # If we've tried all endpoints and still failed, wait before retry if retry < self.max_retries - 1: print(f" Waiting {self.retry_delay} seconds before retry...") time.sleep(self.retry_delay) print(f"āŒ All upload attempts failed") return { "success": False, "error": last_error or "Unknown error", "status_code": None, "retry_count": self.max_retries } def _get_content_type(self, filename: str) -> str: """Get appropriate content type based on file extension""" ext = os.path.splitext(filename)[1].lower() content_types = { '.pdf': 'application/pdf', '.docx': 'application/vnd.openxmlformats-officedocument.wordprocessingml.document', '.doc': 'application/msword', '.txt': 'text/plain', '.png': 'image/png', '.jpg': 'image/jpeg', '.jpeg': 'image/jpeg', '.pptx': 'application/vnd.openxmlformats-officedocument.presentationml.presentation', '.xlsx': 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet' } return content_types.get(ext, 'application/octet-stream') def check_server_health(self, timeout: int = 5) -> bool: """Check if server is responsive""" try: response = requests.get(f"{self.base_url}/health", timeout=timeout) return response.status_code == 200 except: return False def upload_tir_docx(): """Specific function to upload tir.docx with robust handling""" uploader = RobustDocumentUploader() # Check server health first print("šŸ” Checking server health...") if not uploader.check_server_health(): print("āš ļø Server may not be responding. Trying upload anyway...") # Upload the document result = uploader.upload_document("test/tir.docx", timeout=60) if result["success"]: print(f"\nšŸŽ‰ Upload completed successfully!") print(f" Endpoint: {result.get('endpoint')}") print(f" Retries: {result.get('retry_count')}") if "response" in result and result["response"]: print(f" Message: {result['response'].get('message', 'N/A')}") if "track_id" in result["response"]: print(f" Track ID: {result['response']['track_id']}") else: print(f"\nāŒ Upload failed: {result.get('error')}") print("\nšŸ’” Suggestions:") print(" 1. Check if the server is running (port 3015)") print(" 2. Try increasing the timeout (currently 60 seconds)") print(" 3. Check server logs for processing errors") print(" 4. Try with a smaller file first") return result if __name__ == "__main__": upload_tir_docx()