168 lines
6.8 KiB
Python
168 lines
6.8 KiB
Python
"""
|
|
Robust document upload script with timeout handling and retries
|
|
"""
|
|
import requests
|
|
import os
|
|
import time
|
|
from typing import Optional, Dict, Any
|
|
|
|
class RobustDocumentUploader:
|
|
def __init__(self, base_url: str = "http://localhost:3015", api_key: str = "jleu1212"):
|
|
self.base_url = base_url
|
|
self.api_key = api_key
|
|
self.default_timeout = 30 # seconds
|
|
self.max_retries = 3
|
|
self.retry_delay = 5 # seconds
|
|
|
|
def upload_document(self, file_path: str, timeout: Optional[int] = None) -> Dict[str, Any]:
|
|
"""
|
|
Upload a document with robust error handling and retries
|
|
|
|
Args:
|
|
file_path: Path to the document file
|
|
timeout: Timeout in seconds (default: 30)
|
|
|
|
Returns:
|
|
Dictionary with upload results
|
|
"""
|
|
if not os.path.exists(file_path):
|
|
return {
|
|
"success": False,
|
|
"error": f"File not found: {file_path}",
|
|
"status_code": None
|
|
}
|
|
|
|
file_name = os.path.basename(file_path)
|
|
file_size = os.path.getsize(file_path)
|
|
|
|
print(f"📤 Uploading {file_name} ({file_size:,} bytes) to {self.base_url}")
|
|
|
|
# Determine content type based on file extension
|
|
content_type = self._get_content_type(file_name)
|
|
|
|
# Try multiple endpoints
|
|
endpoints = [
|
|
f"{self.base_url}/documents/upload",
|
|
f"{self.base_url}/upload",
|
|
f"{self.base_url}/api/documents/upload"
|
|
]
|
|
|
|
headers = {"X-API-Key": self.api_key}
|
|
timeout = timeout or self.default_timeout
|
|
|
|
last_error = None
|
|
|
|
for retry in range(self.max_retries):
|
|
for endpoint in endpoints:
|
|
try:
|
|
print(f" Attempt {retry + 1}/{self.max_retries}: {endpoint}")
|
|
|
|
with open(file_path, 'rb') as f:
|
|
files = {'file': (file_name, f, content_type)}
|
|
|
|
response = requests.post(
|
|
endpoint,
|
|
files=files,
|
|
headers=headers,
|
|
timeout=timeout
|
|
)
|
|
|
|
print(f" Status: {response.status_code}")
|
|
|
|
if response.status_code == 200:
|
|
print(f"✅ Upload successful!")
|
|
return {
|
|
"success": True,
|
|
"status_code": response.status_code,
|
|
"response": response.json() if response.text else {},
|
|
"endpoint": endpoint,
|
|
"retry_count": retry
|
|
}
|
|
elif response.status_code == 504:
|
|
print(f"⏰ Gateway timeout (504) - server processing may be slow")
|
|
last_error = f"Gateway timeout: {response.text[:200]}"
|
|
else:
|
|
print(f"❌ Upload failed: {response.status_code} - {response.text[:200]}")
|
|
last_error = f"HTTP {response.status_code}: {response.text[:200]}"
|
|
|
|
except requests.exceptions.Timeout:
|
|
print(f"⏰ Request timeout after {timeout} seconds")
|
|
last_error = f"Request timeout after {timeout} seconds"
|
|
except requests.exceptions.ConnectionError as e:
|
|
print(f"🔌 Connection error: {e}")
|
|
last_error = f"Connection error: {e}"
|
|
except Exception as e:
|
|
print(f"❌ Unexpected error: {type(e).__name__}: {e}")
|
|
last_error = f"{type(e).__name__}: {e}"
|
|
|
|
# If we've tried all endpoints and still failed, wait before retry
|
|
if retry < self.max_retries - 1:
|
|
print(f" Waiting {self.retry_delay} seconds before retry...")
|
|
time.sleep(self.retry_delay)
|
|
|
|
print(f"❌ All upload attempts failed")
|
|
return {
|
|
"success": False,
|
|
"error": last_error or "Unknown error",
|
|
"status_code": None,
|
|
"retry_count": self.max_retries
|
|
}
|
|
|
|
def _get_content_type(self, filename: str) -> str:
|
|
"""Get appropriate content type based on file extension"""
|
|
ext = os.path.splitext(filename)[1].lower()
|
|
|
|
content_types = {
|
|
'.pdf': 'application/pdf',
|
|
'.docx': 'application/vnd.openxmlformats-officedocument.wordprocessingml.document',
|
|
'.doc': 'application/msword',
|
|
'.txt': 'text/plain',
|
|
'.png': 'image/png',
|
|
'.jpg': 'image/jpeg',
|
|
'.jpeg': 'image/jpeg',
|
|
'.pptx': 'application/vnd.openxmlformats-officedocument.presentationml.presentation',
|
|
'.xlsx': 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet'
|
|
}
|
|
|
|
return content_types.get(ext, 'application/octet-stream')
|
|
|
|
def check_server_health(self, timeout: int = 5) -> bool:
|
|
"""Check if server is responsive"""
|
|
try:
|
|
response = requests.get(f"{self.base_url}/health", timeout=timeout)
|
|
return response.status_code == 200
|
|
except:
|
|
return False
|
|
|
|
def upload_tir_docx():
|
|
"""Specific function to upload tir.docx with robust handling"""
|
|
uploader = RobustDocumentUploader()
|
|
|
|
# Check server health first
|
|
print("🔍 Checking server health...")
|
|
if not uploader.check_server_health():
|
|
print("⚠️ Server may not be responding. Trying upload anyway...")
|
|
|
|
# Upload the document
|
|
result = uploader.upload_document("test/tir.docx", timeout=60)
|
|
|
|
if result["success"]:
|
|
print(f"\n🎉 Upload completed successfully!")
|
|
print(f" Endpoint: {result.get('endpoint')}")
|
|
print(f" Retries: {result.get('retry_count')}")
|
|
if "response" in result and result["response"]:
|
|
print(f" Message: {result['response'].get('message', 'N/A')}")
|
|
if "track_id" in result["response"]:
|
|
print(f" Track ID: {result['response']['track_id']}")
|
|
else:
|
|
print(f"\n❌ Upload failed: {result.get('error')}")
|
|
print("\n💡 Suggestions:")
|
|
print(" 1. Check if the server is running (port 3015)")
|
|
print(" 2. Try increasing the timeout (currently 60 seconds)")
|
|
print(" 3. Check server logs for processing errors")
|
|
print(" 4. Try with a smaller file first")
|
|
|
|
return result
|
|
|
|
if __name__ == "__main__":
|
|
upload_tir_docx() |