Files
railseek6/fix_webui_bee_detection.py

355 lines
12 KiB
Python

"""
Fix Web UI Bee Detection Issue
Ensures the server uses enhanced document processor and processes test.docx correctly
"""
import os
import sys
import time
import requests
import subprocess
# Configuration
LIGHTRAG_URL = "http://localhost:3015"
API_KEY = "jleu1212"
HEADERS = {"X-API-Key": API_KEY}
def stop_server():
"""Stop the current LightRAG server"""
print("🛑 STOPPING CURRENT SERVER...")
try:
# Find the process using port 3015
result = subprocess.run(["netstat", "-ano"], capture_output=True, text=True)
for line in result.stdout.split('\n'):
if ':3015' in line and 'LISTENING' in line:
parts = line.split()
if len(parts) >= 5:
pid = parts[-1]
print(f"Found server process with PID: {pid}")
subprocess.run(["taskkill", "/F", "/PID", pid], capture_output=True)
print("✅ Server stopped")
time.sleep(3)
return True
print("❌ No server found on port 3015")
return False
except Exception as e:
print(f"❌ Error stopping server: {e}")
return False
def start_server_with_enhanced_processor():
"""Start server with enhanced document processor configuration"""
print("🚀 STARTING SERVER WITH ENHANCED PROCESSOR...")
# Set environment to ensure our processor is used and fix encoding
env = os.environ.copy()
env.update({
"PYTHONPATH": "LightRAG-main", # Ensure our modified processor is used
"CUSTOM_DOCUMENT_PROCESSOR": "true",
"PYTHONIOENCODING": "utf-8", # Fix Unicode encoding issue
"PYTHONUTF8": "1" # Enable UTF-8 mode
})
command = [
sys.executable, "-m", "lightrag.api.lightrag_server",
"--port", "3015",
"--working-dir", "rag_storage",
"--input-dir", "inputs",
"--key", "jleu1212",
"--auto-scan-at-startup",
"--llm-binding", "openai",
"--embedding-binding", "ollama",
"--rerank-binding", "jina",
"--summary-max-tokens", "1200"
]
try:
# Use the production script instead of direct Python command
process = subprocess.Popen(
command,
env=env,
cwd="LightRAG-main",
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
text=True,
encoding='utf-8' # Explicit encoding
)
print("⏳ Waiting for server to start...")
# Wait and check for successful startup
for attempt in range(10):
time.sleep(3)
# Check if process is still running
if process.poll() is not None:
stdout, stderr = process.communicate()
print(f"❌ Server process exited:")
if stdout:
print(f"STDOUT: {stdout[-500:]}") # Last 500 chars
if stderr:
print(f"STDERR: {stderr[-500:]}") # Last 500 chars
return None
# Check if server is responding
try:
response = requests.get("http://localhost:3015/", timeout=2)
if response.status_code == 200:
print("✅ Server started successfully and responding")
return process
except:
pass # Server not ready yet
print("❌ Server not responding after 30 seconds")
return None
except Exception as e:
print(f"❌ Error starting server: {e}")
return None
def wait_for_server():
"""Wait for server to be ready"""
print("⏳ Waiting for server to be ready...")
for attempt in range(10):
try:
response = requests.get(f"{LIGHTRAG_URL}/", headers=HEADERS, timeout=5)
if response.status_code == 200:
print("✅ Server is ready")
return True
except:
pass
time.sleep(3)
print("❌ Server not ready after 30 seconds")
return False
def clear_existing_documents():
"""Clear existing documents to ensure fresh processing"""
print("🗑️ CLEARING EXISTING DOCUMENTS...")
try:
# Get current documents
response = requests.get(f"{LIGHTRAG_URL}/documents", headers=HEADERS, timeout=10)
if response.status_code == 200:
documents = response.json()
if isinstance(documents, list):
for doc in documents:
if 'id' in doc:
doc_id = doc['id']
delete_response = requests.delete(
f"{LIGHTRAG_URL}/documents/{doc_id}",
headers=HEADERS,
timeout=10
)
if delete_response.status_code == 200:
print(f"✅ Deleted document: {doc.get('filename', 'Unknown')}")
else:
print(f"❌ Failed to delete document: {delete_response.status_code}")
print("✅ All documents cleared")
else:
print("❌ Could not get documents list")
except Exception as e:
print(f"❌ Error clearing documents: {e}")
def upload_test_document():
"""Upload test.docx for processing with enhanced processor"""
print("📤 UPLOADING TEST DOCUMENT WITH ENHANCED PROCESSOR...")
test_file = "test.docx"
if not os.path.exists(test_file):
print(f"❌ Test file {test_file} not found")
return False
try:
with open(test_file, 'rb') as f:
files = {'file': (test_file, f, 'application/vnd.openxmlformats-officedocument.wordprocessingml.document')}
response = requests.post(
f"{LIGHTRAG_URL}/documents/upload",
files=files,
headers=HEADERS,
timeout=60 # Longer timeout for processing
)
if response.status_code == 200:
print("✅ Document uploaded successfully")
result = response.json()
print(f" Upload result: {result}")
return True
else:
print(f"❌ Upload failed: {response.status_code} - {response.text}")
return False
except Exception as e:
print(f"❌ Upload error: {e}")
return False
def wait_for_processing():
"""Wait for document processing to complete"""
print("⏳ WAITING FOR DOCUMENT PROCESSING...")
for attempt in range(20): # Wait up to 2 minutes
try:
response = requests.get(f"{LIGHTRAG_URL}/documents", headers=HEADERS, timeout=10)
if response.status_code == 200:
documents = response.json()
if isinstance(documents, list):
for doc in documents:
if 'test.docx' in doc.get('filename', '').lower():
status = doc.get('status', 'unknown')
print(f"📄 Document status: {status}")
if status == 'processed':
print("✅ Document processing completed")
return True
time.sleep(6)
except Exception as e:
print(f"⚠️ Status check error: {e}")
time.sleep(6)
print("❌ Timeout waiting for processing")
return False
def test_bee_search():
"""Test if bee classification is now searchable"""
print("🔍 TESTING BEE SEARCH...")
search_queries = [
"bee",
"Bee",
"classification",
"photo of a bee",
"Entity: Bee",
"insect",
"animal"
]
bee_found = False
for query in search_queries:
try:
search_payload = {
"query": query,
"top_k": 10,
"mode": "hybrid" # Use hybrid mode which worked in diagnostics
}
response = requests.post(
f"{LIGHTRAG_URL}/search",
json=search_payload,
headers=HEADERS,
timeout=30
)
if response.status_code == 200:
results = response.json()
if results.get('results'):
print(f"'{query}': Found {len(results['results'])} results")
# Check if any result contains bee-related content
for result in results['results']:
content = result.get('content', '').lower()
score = result.get('score', 0)
if 'bee' in content or 'classification' in content:
print(f"🎯 BEE FOUND: Score {score:.4f}")
print(f" Content: {content[:200]}...")
bee_found = True
else:
print(f"'{query}': No results")
else:
print(f"'{query}' search failed: {response.status_code}")
except Exception as e:
print(f"'{query}' search error: {e}")
return bee_found
def verify_webui_access():
"""Verify Web UI is accessible"""
print("🌐 VERIFYING WEB UI ACCESS...")
try:
response = requests.get(f"{LIGHTRAG_URL}/webui", timeout=10)
if response.status_code == 200:
print("✅ Web UI is accessible")
return True
else:
print(f"❌ Web UI not accessible: {response.status_code}")
return False
except Exception as e:
print(f"❌ Web UI test error: {e}")
return False
def main():
"""Main fix function"""
print("🔧 FIXING WEB UI BEE DETECTION ISSUE")
print("=" * 60)
# Step 1: Stop current server
if not stop_server():
print("⚠️ Could not stop server, but continuing...")
# Step 2: Start server with enhanced processor
server_process = start_server_with_enhanced_processor()
if not server_process:
print("❌ Cannot proceed - server not started")
return False
# Step 3: Wait for server to be ready
if not wait_for_server():
print("❌ Server not ready, but continuing...")
# Step 4: Clear existing documents
clear_existing_documents()
# Step 5: Upload test document
if not upload_test_document():
print("❌ Document upload failed")
return False
# Step 6: Wait for processing
if not wait_for_processing():
print("⚠️ Processing timeout, but continuing with search...")
# Step 7: Test bee search
bee_found = test_bee_search()
# Step 8: Verify Web UI access
webui_accessible = verify_webui_access()
print("\n" + "=" * 60)
print("📊 FIX RESULTS")
print("=" * 60)
if bee_found:
print("🎉 SUCCESS: Bee classification is now searchable!")
print(" The enhanced document processor is working correctly.")
print(" The Web UI should now detect bee classification.")
else:
print("❌ ISSUE: Bee classification still not searchable")
print(" There may be an issue with the enhanced processor")
print(" or the image classification is not running.")
print(f"✅ Web UI Accessible: {'Yes' if webui_accessible else 'No'}")
print("\n💡 Next steps:")
print(" 1. Open the Web UI at http://localhost:3015/webui")
print(" 2. Search for 'bee' to verify classification appears")
print(" 3. Check server logs for any processing errors")
if bee_found:
print("\n✅ FIX COMPLETED: Web UI should now detect bee classification")
return True
else:
print("\n❌ FIX INCOMPLETE: Further investigation needed")
return False
if __name__ == "__main__":
success = main()
if success:
print("\n🎉 The fix has been applied successfully!")
print(" Please test the Web UI to confirm bee detection is working.")
else:
print("\n⚠️ The fix encountered issues.")
print(" Please check the server logs for more details.")