""" Analyze the server-side processing issue by examining the actual server logs and behavior """ import requests import json import time def analyze_server_issue(): print("šŸ” Analyzing server-side processing issue...") # Check server status try: print("\nšŸ“” Checking server status...") response = requests.get("http://localhost:3015/health") print(f" Server health: {response.status_code}") if response.status_code == 200: print(" āœ… Server is running") else: print(" āŒ Server health check failed") except Exception as e: print(f" āŒ Cannot connect to server: {e}") return # Check document status try: print("\nšŸ“Š Checking current document status...") # Login first to get token login_data = { "username": "admin", "password": "password" } login_response = requests.post("http://localhost:3015/auth/login", data=login_data) if login_response.status_code == 200: token = login_response.json().get("access_token") headers = {"Authorization": f"Bearer {token}"} # Get document status docs_response = requests.get("http://localhost:3015/documents", headers=headers) if docs_response.status_code == 200: docs_data = docs_response.json() print(" Current document status:") for status, docs in docs_data.get("statuses", {}).items(): print(f" {status}: {len(docs)} documents") for doc in docs: print(f" - {doc.get('file_path')}: {doc.get('error_msg', 'No error')}") else: print(f" āŒ Failed to get documents: {docs_response.status_code}") else: print(f" āŒ Login failed: {login_response.status_code}") except Exception as e: print(f" āŒ Error checking document status: {e}") # Check server logs for recent activity print("\nšŸ“‹ Checking for recent server logs...") try: with open('lightrag.log', 'r', encoding='utf-8') as f: lines = f.readlines() # Get last 50 lines recent_logs = lines[-50:] if len(lines) > 50 else lines print(" Recent server logs:") for line in recent_logs: if 'ocr' in line.lower() or 'pdf' in line.lower() or 'whitespace' in line.lower(): print(f" {line.strip()}") except FileNotFoundError: print(" āŒ lightrag.log file not found") except Exception as e: print(f" āŒ Error reading logs: {e}") # Analyze the root cause print("\nšŸ”¬ Root Cause Analysis:") print(" 1. āœ… Direct document processor works correctly (extracts 1516 characters)") print(" 2. āœ… OCR engine is properly initialized with GPU") print(" 3. āŒ Server upload fails with 'File content contains only whitespace characters'") print(" 4. āŒ Server is using cached/old document processor code") print(" 5. āŒ Server restart didn't load updated document_processor.py") print("\nšŸ’” Possible Solutions:") print(" - Kill all Python processes and restart server") print(" - Check if server is using a different virtual environment") print(" - Verify the document_processor.py file loaded by server") print(" - Add debug logging to server's document processing pipeline") if __name__ == "__main__": analyze_server_issue()