87 lines
3.5 KiB
Python
87 lines
3.5 KiB
Python
"""
|
|
Analyze the server-side processing issue by examining the actual server logs and behavior
|
|
"""
|
|
import requests
|
|
import json
|
|
import time
|
|
|
|
def analyze_server_issue():
|
|
print("🔍 Analyzing server-side processing issue...")
|
|
|
|
# Check server status
|
|
try:
|
|
print("\n📡 Checking server status...")
|
|
response = requests.get("http://localhost:3015/health")
|
|
print(f" Server health: {response.status_code}")
|
|
if response.status_code == 200:
|
|
print(" ✅ Server is running")
|
|
else:
|
|
print(" ❌ Server health check failed")
|
|
except Exception as e:
|
|
print(f" ❌ Cannot connect to server: {e}")
|
|
return
|
|
|
|
# Check document status
|
|
try:
|
|
print("\n📊 Checking current document status...")
|
|
|
|
# Login first to get token
|
|
login_data = {
|
|
"username": "admin",
|
|
"password": "password"
|
|
}
|
|
login_response = requests.post("http://localhost:3015/auth/login", data=login_data)
|
|
|
|
if login_response.status_code == 200:
|
|
token = login_response.json().get("access_token")
|
|
headers = {"Authorization": f"Bearer {token}"}
|
|
|
|
# Get document status
|
|
docs_response = requests.get("http://localhost:3015/documents", headers=headers)
|
|
if docs_response.status_code == 200:
|
|
docs_data = docs_response.json()
|
|
print(" Current document status:")
|
|
for status, docs in docs_data.get("statuses", {}).items():
|
|
print(f" {status}: {len(docs)} documents")
|
|
for doc in docs:
|
|
print(f" - {doc.get('file_path')}: {doc.get('error_msg', 'No error')}")
|
|
else:
|
|
print(f" ❌ Failed to get documents: {docs_response.status_code}")
|
|
else:
|
|
print(f" ❌ Login failed: {login_response.status_code}")
|
|
|
|
except Exception as e:
|
|
print(f" ❌ Error checking document status: {e}")
|
|
|
|
# Check server logs for recent activity
|
|
print("\n📋 Checking for recent server logs...")
|
|
try:
|
|
with open('lightrag.log', 'r', encoding='utf-8') as f:
|
|
lines = f.readlines()
|
|
# Get last 50 lines
|
|
recent_logs = lines[-50:] if len(lines) > 50 else lines
|
|
print(" Recent server logs:")
|
|
for line in recent_logs:
|
|
if 'ocr' in line.lower() or 'pdf' in line.lower() or 'whitespace' in line.lower():
|
|
print(f" {line.strip()}")
|
|
except FileNotFoundError:
|
|
print(" ❌ lightrag.log file not found")
|
|
except Exception as e:
|
|
print(f" ❌ Error reading logs: {e}")
|
|
|
|
# Analyze the root cause
|
|
print("\n🔬 Root Cause Analysis:")
|
|
print(" 1. ✅ Direct document processor works correctly (extracts 1516 characters)")
|
|
print(" 2. ✅ OCR engine is properly initialized with GPU")
|
|
print(" 3. ❌ Server upload fails with 'File content contains only whitespace characters'")
|
|
print(" 4. ❌ Server is using cached/old document processor code")
|
|
print(" 5. ❌ Server restart didn't load updated document_processor.py")
|
|
|
|
print("\n💡 Possible Solutions:")
|
|
print(" - Kill all Python processes and restart server")
|
|
print(" - Check if server is using a different virtual environment")
|
|
print(" - Verify the document_processor.py file loaded by server")
|
|
print(" - Add debug logging to server's document processing pipeline")
|
|
|
|
if __name__ == "__main__":
|
|
analyze_server_issue() |