Files
railseek6/test_webui_bee_search.py

221 lines
8.5 KiB
Python

#!/usr/bin/env python3
"""
Test script to verify Web UI search functionality for bee classification
This tests the complete pipeline including LightRAG's document processing
"""
import requests
import json
import time
import os
from pathlib import Path
# Configuration
LIGHTRAG_URL = "http://localhost:3015"
TEST_DOCX_PATH = "test.docx"
API_KEY = "jleu1212" # Correct API key from zrun.bat
def test_webui_search():
"""Test complete Web UI search pipeline for bee classification"""
print("🧪 Testing Web UI Bee Search Pipeline")
print("=" * 50)
# Check if LightRAG server is running
try:
response = requests.get(f"{LIGHTRAG_URL}/health", timeout=10)
if response.status_code != 200:
print("❌ LightRAG server is not responding")
return False
print("✅ LightRAG server is running")
except Exception as e:
print(f"❌ Cannot connect to LightRAG server: {e}")
return False
# Check if test document exists
if not os.path.exists(TEST_DOCX_PATH):
print(f"❌ Test document not found: {TEST_DOCX_PATH}")
return False
print(f"✅ Test document found: {TEST_DOCX_PATH}")
# Upload test document
print("\n📤 Uploading test document...")
try:
with open(TEST_DOCX_PATH, 'rb') as f:
files = {'file': (TEST_DOCX_PATH, f, 'application/vnd.openxmlformats-officedocument.wordprocessingml.document')}
headers = {'X-API-Key': API_KEY}
response = requests.post(f"{LIGHTRAG_URL}/documents/upload", files=files, headers=headers)
if response.status_code == 200:
upload_result = response.json()
track_id = upload_result.get('track_id')
print(f"✅ Document uploaded successfully, track_id: {track_id}")
else:
print(f"❌ Upload failed: {response.status_code} - {response.text}")
return False
except Exception as e:
print(f"❌ Upload error: {e}")
return False
# Wait for indexing to complete
print("\n⏳ Waiting for indexing to complete...")
max_wait_time = 120 # 2 minutes
wait_interval = 5
waited = 0
while waited < max_wait_time:
try:
# Check pipeline status
headers = {'X-API-Key': API_KEY}
status_response = requests.get(f"{LIGHTRAG_URL}/documents/pipeline_status", headers=headers)
if status_response.status_code == 200:
status_data = status_response.json()
busy = status_data.get('busy', False)
if not busy:
print("✅ Indexing completed")
break
else:
job_name = status_data.get('job_name', 'Unknown')
print(f"⏳ Still indexing... ({job_name})")
else:
print(f"⚠️ Could not get pipeline status: {status_response.status_code}")
except Exception as e:
print(f"⚠️ Error checking pipeline status: {e}")
time.sleep(wait_interval)
waited += wait_interval
if waited >= max_wait_time:
print("❌ Indexing timeout - proceeding with search anyway")
# Test search for "bee"
print("\n🔍 Testing search for 'bee'...")
try:
search_payload = {
"query": "bee",
"top_k": 10,
"mode": "local"
}
headers = {
'Content-Type': 'application/json',
'X-API-Key': API_KEY
}
search_response = requests.post(
f"{LIGHTRAG_URL}/search",
json=search_payload,
headers=headers,
timeout=30
)
if search_response.status_code == 200:
search_results = search_response.json()
results = search_results.get('results', [])
print(f"✅ Search completed, found {len(results)} results")
# Analyze search results
bee_found = False
for i, result in enumerate(results):
content = result.get('content', '')
score = result.get('score', 0)
source = result.get('source', 'Unknown')
print(f"\nResult {i+1} (Score: {score:.4f}, Source: {source}):")
print(f"Content preview: {content[:200]}...")
# Check if bee classification is in the content
if 'bee' in content.lower() or 'classification' in content.lower():
bee_found = True
print("🎯 BEE CLASSIFICATION DETECTED IN SEARCH RESULT!")
if bee_found:
print("\n✅ SUCCESS: Bee classification is searchable in Web UI!")
return True
else:
print("\n❌ Bee classification not found in search results")
print("This might indicate:")
print("- Classification metadata not properly indexed")
print("- Search query needs adjustment")
print("- Indexing may not have completed")
# Try alternative search queries
print("\n🔍 Trying alternative search queries...")
alternative_queries = ["classification", "image", "photo", "clipart"]
for alt_query in alternative_queries:
alt_payload = {
"query": alt_query,
"top_k": 5,
"mode": "local"
}
alt_response = requests.post(
f"{LIGHTRAG_URL}/search",
json=alt_payload,
headers=headers,
timeout=10
)
if alt_response.status_code == 200:
alt_results = alt_response.json().get('results', [])
if alt_results:
print(f"Query '{alt_query}': Found {len(alt_results)} results")
for result in alt_results[:2]: # Show first 2 results
content_preview = result.get('content', '')[:150]
print(f" - {content_preview}...")
else:
print(f"Query '{alt_query}': No results")
return False
else:
print(f"❌ Search failed: {search_response.status_code} - {search_response.text}")
return False
except Exception as e:
print(f"❌ Search error: {e}")
return False
def check_document_status():
"""Check the status of uploaded documents"""
print("\n📊 Checking document status...")
try:
headers = {'X-API-Key': API_KEY}
response = requests.get(f"{LIGHTRAG_URL}/documents", headers=headers)
if response.status_code == 200:
status_data = response.json()
statuses = status_data.get('statuses', {})
for status, docs in statuses.items():
print(f"{status}: {len(docs)} documents")
for doc in docs[:3]: # Show first 3 documents of each status
print(f" - {doc.get('file_path', 'Unknown')} (ID: {doc.get('id', 'Unknown')})")
else:
print(f"❌ Could not get document status: {response.status_code}")
except Exception as e:
print(f"❌ Error checking document status: {e}")
if __name__ == "__main__":
print("Web UI Bee Search Test")
print("This test verifies that bee classification is searchable through the Web UI")
print("Make sure LightRAG server is running on port 3015")
print()
success = test_webui_search()
check_document_status()
if success:
print("\n🎉 TEST PASSED: Bee classification is successfully searchable in Web UI!")
else:
print("\n💥 TEST FAILED: Bee classification is not searchable in Web UI")
print("\nTroubleshooting steps:")
print("1. Check that LightRAG server is running on port 3015")
print("2. Verify the document processor is using our custom implementation")
print("3. Check if the test.docx file contains the bee image")
print("4. Verify that classification metadata is being added to the content")
print("5. Check LightRAG logs for any processing errors")