Files
railseek6/robust_document_upload.py

168 lines
6.8 KiB
Python

"""
Robust document upload script with timeout handling and retries
"""
import requests
import os
import time
from typing import Optional, Dict, Any
class RobustDocumentUploader:
def __init__(self, base_url: str = "http://localhost:3015", api_key: str = "jleu1212"):
self.base_url = base_url
self.api_key = api_key
self.default_timeout = 30 # seconds
self.max_retries = 3
self.retry_delay = 5 # seconds
def upload_document(self, file_path: str, timeout: Optional[int] = None) -> Dict[str, Any]:
"""
Upload a document with robust error handling and retries
Args:
file_path: Path to the document file
timeout: Timeout in seconds (default: 30)
Returns:
Dictionary with upload results
"""
if not os.path.exists(file_path):
return {
"success": False,
"error": f"File not found: {file_path}",
"status_code": None
}
file_name = os.path.basename(file_path)
file_size = os.path.getsize(file_path)
print(f"📤 Uploading {file_name} ({file_size:,} bytes) to {self.base_url}")
# Determine content type based on file extension
content_type = self._get_content_type(file_name)
# Try multiple endpoints
endpoints = [
f"{self.base_url}/documents/upload",
f"{self.base_url}/upload",
f"{self.base_url}/api/documents/upload"
]
headers = {"X-API-Key": self.api_key}
timeout = timeout or self.default_timeout
last_error = None
for retry in range(self.max_retries):
for endpoint in endpoints:
try:
print(f" Attempt {retry + 1}/{self.max_retries}: {endpoint}")
with open(file_path, 'rb') as f:
files = {'file': (file_name, f, content_type)}
response = requests.post(
endpoint,
files=files,
headers=headers,
timeout=timeout
)
print(f" Status: {response.status_code}")
if response.status_code == 200:
print(f"✅ Upload successful!")
return {
"success": True,
"status_code": response.status_code,
"response": response.json() if response.text else {},
"endpoint": endpoint,
"retry_count": retry
}
elif response.status_code == 504:
print(f"⏰ Gateway timeout (504) - server processing may be slow")
last_error = f"Gateway timeout: {response.text[:200]}"
else:
print(f"❌ Upload failed: {response.status_code} - {response.text[:200]}")
last_error = f"HTTP {response.status_code}: {response.text[:200]}"
except requests.exceptions.Timeout:
print(f"⏰ Request timeout after {timeout} seconds")
last_error = f"Request timeout after {timeout} seconds"
except requests.exceptions.ConnectionError as e:
print(f"🔌 Connection error: {e}")
last_error = f"Connection error: {e}"
except Exception as e:
print(f"❌ Unexpected error: {type(e).__name__}: {e}")
last_error = f"{type(e).__name__}: {e}"
# If we've tried all endpoints and still failed, wait before retry
if retry < self.max_retries - 1:
print(f" Waiting {self.retry_delay} seconds before retry...")
time.sleep(self.retry_delay)
print(f"❌ All upload attempts failed")
return {
"success": False,
"error": last_error or "Unknown error",
"status_code": None,
"retry_count": self.max_retries
}
def _get_content_type(self, filename: str) -> str:
"""Get appropriate content type based on file extension"""
ext = os.path.splitext(filename)[1].lower()
content_types = {
'.pdf': 'application/pdf',
'.docx': 'application/vnd.openxmlformats-officedocument.wordprocessingml.document',
'.doc': 'application/msword',
'.txt': 'text/plain',
'.png': 'image/png',
'.jpg': 'image/jpeg',
'.jpeg': 'image/jpeg',
'.pptx': 'application/vnd.openxmlformats-officedocument.presentationml.presentation',
'.xlsx': 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet'
}
return content_types.get(ext, 'application/octet-stream')
def check_server_health(self, timeout: int = 5) -> bool:
"""Check if server is responsive"""
try:
response = requests.get(f"{self.base_url}/health", timeout=timeout)
return response.status_code == 200
except:
return False
def upload_tir_docx():
"""Specific function to upload tir.docx with robust handling"""
uploader = RobustDocumentUploader()
# Check server health first
print("🔍 Checking server health...")
if not uploader.check_server_health():
print("⚠️ Server may not be responding. Trying upload anyway...")
# Upload the document
result = uploader.upload_document("test/tir.docx", timeout=60)
if result["success"]:
print(f"\n🎉 Upload completed successfully!")
print(f" Endpoint: {result.get('endpoint')}")
print(f" Retries: {result.get('retry_count')}")
if "response" in result and result["response"]:
print(f" Message: {result['response'].get('message', 'N/A')}")
if "track_id" in result["response"]:
print(f" Track ID: {result['response']['track_id']}")
else:
print(f"\n❌ Upload failed: {result.get('error')}")
print("\n💡 Suggestions:")
print(" 1. Check if the server is running (port 3015)")
print(" 2. Try increasing the timeout (currently 60 seconds)")
print(" 3. Check server logs for processing errors")
print(" 4. Try with a smaller file first")
return result
if __name__ == "__main__":
upload_tir_docx()