355 lines
12 KiB
Python
355 lines
12 KiB
Python
"""
|
|
Fix Web UI Bee Detection Issue
|
|
Ensures the server uses enhanced document processor and processes test.docx correctly
|
|
"""
|
|
|
|
import os
|
|
import sys
|
|
import time
|
|
import requests
|
|
import subprocess
|
|
|
|
# Configuration
|
|
LIGHTRAG_URL = "http://localhost:3015"
|
|
API_KEY = "jleu1212"
|
|
HEADERS = {"X-API-Key": API_KEY}
|
|
|
|
def stop_server():
|
|
"""Stop the current LightRAG server"""
|
|
print("🛑 STOPPING CURRENT SERVER...")
|
|
|
|
try:
|
|
# Find the process using port 3015
|
|
result = subprocess.run(["netstat", "-ano"], capture_output=True, text=True)
|
|
for line in result.stdout.split('\n'):
|
|
if ':3015' in line and 'LISTENING' in line:
|
|
parts = line.split()
|
|
if len(parts) >= 5:
|
|
pid = parts[-1]
|
|
print(f"Found server process with PID: {pid}")
|
|
subprocess.run(["taskkill", "/F", "/PID", pid], capture_output=True)
|
|
print("✅ Server stopped")
|
|
time.sleep(3)
|
|
return True
|
|
print("❌ No server found on port 3015")
|
|
return False
|
|
except Exception as e:
|
|
print(f"❌ Error stopping server: {e}")
|
|
return False
|
|
|
|
def start_server_with_enhanced_processor():
|
|
"""Start server with enhanced document processor configuration"""
|
|
print("🚀 STARTING SERVER WITH ENHANCED PROCESSOR...")
|
|
|
|
# Set environment to ensure our processor is used and fix encoding
|
|
env = os.environ.copy()
|
|
env.update({
|
|
"PYTHONPATH": "LightRAG-main", # Ensure our modified processor is used
|
|
"CUSTOM_DOCUMENT_PROCESSOR": "true",
|
|
"PYTHONIOENCODING": "utf-8", # Fix Unicode encoding issue
|
|
"PYTHONUTF8": "1" # Enable UTF-8 mode
|
|
})
|
|
|
|
command = [
|
|
sys.executable, "-m", "lightrag.api.lightrag_server",
|
|
"--port", "3015",
|
|
"--working-dir", "rag_storage",
|
|
"--input-dir", "inputs",
|
|
"--key", "jleu1212",
|
|
"--auto-scan-at-startup",
|
|
"--llm-binding", "openai",
|
|
"--embedding-binding", "ollama",
|
|
"--rerank-binding", "jina",
|
|
"--summary-max-tokens", "1200"
|
|
]
|
|
|
|
try:
|
|
# Use the production script instead of direct Python command
|
|
process = subprocess.Popen(
|
|
command,
|
|
env=env,
|
|
cwd="LightRAG-main",
|
|
stdout=subprocess.PIPE,
|
|
stderr=subprocess.PIPE,
|
|
text=True,
|
|
encoding='utf-8' # Explicit encoding
|
|
)
|
|
|
|
print("⏳ Waiting for server to start...")
|
|
|
|
# Wait and check for successful startup
|
|
for attempt in range(10):
|
|
time.sleep(3)
|
|
|
|
# Check if process is still running
|
|
if process.poll() is not None:
|
|
stdout, stderr = process.communicate()
|
|
print(f"❌ Server process exited:")
|
|
if stdout:
|
|
print(f"STDOUT: {stdout[-500:]}") # Last 500 chars
|
|
if stderr:
|
|
print(f"STDERR: {stderr[-500:]}") # Last 500 chars
|
|
return None
|
|
|
|
# Check if server is responding
|
|
try:
|
|
response = requests.get("http://localhost:3015/", timeout=2)
|
|
if response.status_code == 200:
|
|
print("✅ Server started successfully and responding")
|
|
return process
|
|
except:
|
|
pass # Server not ready yet
|
|
|
|
print("❌ Server not responding after 30 seconds")
|
|
return None
|
|
|
|
except Exception as e:
|
|
print(f"❌ Error starting server: {e}")
|
|
return None
|
|
|
|
def wait_for_server():
|
|
"""Wait for server to be ready"""
|
|
print("⏳ Waiting for server to be ready...")
|
|
|
|
for attempt in range(10):
|
|
try:
|
|
response = requests.get(f"{LIGHTRAG_URL}/", headers=HEADERS, timeout=5)
|
|
if response.status_code == 200:
|
|
print("✅ Server is ready")
|
|
return True
|
|
except:
|
|
pass
|
|
|
|
time.sleep(3)
|
|
|
|
print("❌ Server not ready after 30 seconds")
|
|
return False
|
|
|
|
def clear_existing_documents():
|
|
"""Clear existing documents to ensure fresh processing"""
|
|
print("🗑️ CLEARING EXISTING DOCUMENTS...")
|
|
|
|
try:
|
|
# Get current documents
|
|
response = requests.get(f"{LIGHTRAG_URL}/documents", headers=HEADERS, timeout=10)
|
|
if response.status_code == 200:
|
|
documents = response.json()
|
|
if isinstance(documents, list):
|
|
for doc in documents:
|
|
if 'id' in doc:
|
|
doc_id = doc['id']
|
|
delete_response = requests.delete(
|
|
f"{LIGHTRAG_URL}/documents/{doc_id}",
|
|
headers=HEADERS,
|
|
timeout=10
|
|
)
|
|
if delete_response.status_code == 200:
|
|
print(f"✅ Deleted document: {doc.get('filename', 'Unknown')}")
|
|
else:
|
|
print(f"❌ Failed to delete document: {delete_response.status_code}")
|
|
print("✅ All documents cleared")
|
|
else:
|
|
print("❌ Could not get documents list")
|
|
except Exception as e:
|
|
print(f"❌ Error clearing documents: {e}")
|
|
|
|
def upload_test_document():
|
|
"""Upload test.docx for processing with enhanced processor"""
|
|
print("📤 UPLOADING TEST DOCUMENT WITH ENHANCED PROCESSOR...")
|
|
|
|
test_file = "test.docx"
|
|
if not os.path.exists(test_file):
|
|
print(f"❌ Test file {test_file} not found")
|
|
return False
|
|
|
|
try:
|
|
with open(test_file, 'rb') as f:
|
|
files = {'file': (test_file, f, 'application/vnd.openxmlformats-officedocument.wordprocessingml.document')}
|
|
response = requests.post(
|
|
f"{LIGHTRAG_URL}/documents/upload",
|
|
files=files,
|
|
headers=HEADERS,
|
|
timeout=60 # Longer timeout for processing
|
|
)
|
|
|
|
if response.status_code == 200:
|
|
print("✅ Document uploaded successfully")
|
|
result = response.json()
|
|
print(f" Upload result: {result}")
|
|
return True
|
|
else:
|
|
print(f"❌ Upload failed: {response.status_code} - {response.text}")
|
|
return False
|
|
|
|
except Exception as e:
|
|
print(f"❌ Upload error: {e}")
|
|
return False
|
|
|
|
def wait_for_processing():
|
|
"""Wait for document processing to complete"""
|
|
print("⏳ WAITING FOR DOCUMENT PROCESSING...")
|
|
|
|
for attempt in range(20): # Wait up to 2 minutes
|
|
try:
|
|
response = requests.get(f"{LIGHTRAG_URL}/documents", headers=HEADERS, timeout=10)
|
|
if response.status_code == 200:
|
|
documents = response.json()
|
|
if isinstance(documents, list):
|
|
for doc in documents:
|
|
if 'test.docx' in doc.get('filename', '').lower():
|
|
status = doc.get('status', 'unknown')
|
|
print(f"📄 Document status: {status}")
|
|
if status == 'processed':
|
|
print("✅ Document processing completed")
|
|
return True
|
|
time.sleep(6)
|
|
except Exception as e:
|
|
print(f"⚠️ Status check error: {e}")
|
|
time.sleep(6)
|
|
|
|
print("❌ Timeout waiting for processing")
|
|
return False
|
|
|
|
def test_bee_search():
|
|
"""Test if bee classification is now searchable"""
|
|
print("🔍 TESTING BEE SEARCH...")
|
|
|
|
search_queries = [
|
|
"bee",
|
|
"Bee",
|
|
"classification",
|
|
"photo of a bee",
|
|
"Entity: Bee",
|
|
"insect",
|
|
"animal"
|
|
]
|
|
|
|
bee_found = False
|
|
|
|
for query in search_queries:
|
|
try:
|
|
search_payload = {
|
|
"query": query,
|
|
"top_k": 10,
|
|
"mode": "hybrid" # Use hybrid mode which worked in diagnostics
|
|
}
|
|
|
|
response = requests.post(
|
|
f"{LIGHTRAG_URL}/search",
|
|
json=search_payload,
|
|
headers=HEADERS,
|
|
timeout=30
|
|
)
|
|
|
|
if response.status_code == 200:
|
|
results = response.json()
|
|
if results.get('results'):
|
|
print(f"✅ '{query}': Found {len(results['results'])} results")
|
|
|
|
# Check if any result contains bee-related content
|
|
for result in results['results']:
|
|
content = result.get('content', '').lower()
|
|
score = result.get('score', 0)
|
|
|
|
if 'bee' in content or 'classification' in content:
|
|
print(f"🎯 BEE FOUND: Score {score:.4f}")
|
|
print(f" Content: {content[:200]}...")
|
|
bee_found = True
|
|
else:
|
|
print(f"❌ '{query}': No results")
|
|
else:
|
|
print(f"❌ '{query}' search failed: {response.status_code}")
|
|
|
|
except Exception as e:
|
|
print(f"❌ '{query}' search error: {e}")
|
|
|
|
return bee_found
|
|
|
|
def verify_webui_access():
|
|
"""Verify Web UI is accessible"""
|
|
print("🌐 VERIFYING WEB UI ACCESS...")
|
|
|
|
try:
|
|
response = requests.get(f"{LIGHTRAG_URL}/webui", timeout=10)
|
|
if response.status_code == 200:
|
|
print("✅ Web UI is accessible")
|
|
return True
|
|
else:
|
|
print(f"❌ Web UI not accessible: {response.status_code}")
|
|
return False
|
|
except Exception as e:
|
|
print(f"❌ Web UI test error: {e}")
|
|
return False
|
|
|
|
def main():
|
|
"""Main fix function"""
|
|
print("🔧 FIXING WEB UI BEE DETECTION ISSUE")
|
|
print("=" * 60)
|
|
|
|
# Step 1: Stop current server
|
|
if not stop_server():
|
|
print("⚠️ Could not stop server, but continuing...")
|
|
|
|
# Step 2: Start server with enhanced processor
|
|
server_process = start_server_with_enhanced_processor()
|
|
if not server_process:
|
|
print("❌ Cannot proceed - server not started")
|
|
return False
|
|
|
|
# Step 3: Wait for server to be ready
|
|
if not wait_for_server():
|
|
print("❌ Server not ready, but continuing...")
|
|
|
|
# Step 4: Clear existing documents
|
|
clear_existing_documents()
|
|
|
|
# Step 5: Upload test document
|
|
if not upload_test_document():
|
|
print("❌ Document upload failed")
|
|
return False
|
|
|
|
# Step 6: Wait for processing
|
|
if not wait_for_processing():
|
|
print("⚠️ Processing timeout, but continuing with search...")
|
|
|
|
# Step 7: Test bee search
|
|
bee_found = test_bee_search()
|
|
|
|
# Step 8: Verify Web UI access
|
|
webui_accessible = verify_webui_access()
|
|
|
|
print("\n" + "=" * 60)
|
|
print("📊 FIX RESULTS")
|
|
print("=" * 60)
|
|
|
|
if bee_found:
|
|
print("🎉 SUCCESS: Bee classification is now searchable!")
|
|
print(" The enhanced document processor is working correctly.")
|
|
print(" The Web UI should now detect bee classification.")
|
|
else:
|
|
print("❌ ISSUE: Bee classification still not searchable")
|
|
print(" There may be an issue with the enhanced processor")
|
|
print(" or the image classification is not running.")
|
|
|
|
print(f"✅ Web UI Accessible: {'Yes' if webui_accessible else 'No'}")
|
|
|
|
print("\n💡 Next steps:")
|
|
print(" 1. Open the Web UI at http://localhost:3015/webui")
|
|
print(" 2. Search for 'bee' to verify classification appears")
|
|
print(" 3. Check server logs for any processing errors")
|
|
|
|
if bee_found:
|
|
print("\n✅ FIX COMPLETED: Web UI should now detect bee classification")
|
|
return True
|
|
else:
|
|
print("\n❌ FIX INCOMPLETE: Further investigation needed")
|
|
return False
|
|
|
|
if __name__ == "__main__":
|
|
success = main()
|
|
if success:
|
|
print("\n🎉 The fix has been applied successfully!")
|
|
print(" Please test the Web UI to confirm bee detection is working.")
|
|
else:
|
|
print("\n⚠️ The fix encountered issues.")
|
|
print(" Please check the server logs for more details.") |