221 lines
8.5 KiB
Python
221 lines
8.5 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
Test script to verify Web UI search functionality for bee classification
|
|
This tests the complete pipeline including LightRAG's document processing
|
|
"""
|
|
|
|
import requests
|
|
import json
|
|
import time
|
|
import os
|
|
from pathlib import Path
|
|
|
|
# Configuration
|
|
LIGHTRAG_URL = "http://localhost:3015"
|
|
TEST_DOCX_PATH = "test.docx"
|
|
API_KEY = "jleu1212" # Correct API key from zrun.bat
|
|
|
|
def test_webui_search():
|
|
"""Test complete Web UI search pipeline for bee classification"""
|
|
print("🧪 Testing Web UI Bee Search Pipeline")
|
|
print("=" * 50)
|
|
|
|
# Check if LightRAG server is running
|
|
try:
|
|
response = requests.get(f"{LIGHTRAG_URL}/health", timeout=10)
|
|
if response.status_code != 200:
|
|
print("❌ LightRAG server is not responding")
|
|
return False
|
|
print("✅ LightRAG server is running")
|
|
except Exception as e:
|
|
print(f"❌ Cannot connect to LightRAG server: {e}")
|
|
return False
|
|
|
|
# Check if test document exists
|
|
if not os.path.exists(TEST_DOCX_PATH):
|
|
print(f"❌ Test document not found: {TEST_DOCX_PATH}")
|
|
return False
|
|
print(f"✅ Test document found: {TEST_DOCX_PATH}")
|
|
|
|
# Upload test document
|
|
print("\n📤 Uploading test document...")
|
|
try:
|
|
with open(TEST_DOCX_PATH, 'rb') as f:
|
|
files = {'file': (TEST_DOCX_PATH, f, 'application/vnd.openxmlformats-officedocument.wordprocessingml.document')}
|
|
headers = {'X-API-Key': API_KEY}
|
|
response = requests.post(f"{LIGHTRAG_URL}/documents/upload", files=files, headers=headers)
|
|
|
|
if response.status_code == 200:
|
|
upload_result = response.json()
|
|
track_id = upload_result.get('track_id')
|
|
print(f"✅ Document uploaded successfully, track_id: {track_id}")
|
|
else:
|
|
print(f"❌ Upload failed: {response.status_code} - {response.text}")
|
|
return False
|
|
except Exception as e:
|
|
print(f"❌ Upload error: {e}")
|
|
return False
|
|
|
|
# Wait for indexing to complete
|
|
print("\n⏳ Waiting for indexing to complete...")
|
|
max_wait_time = 120 # 2 minutes
|
|
wait_interval = 5
|
|
waited = 0
|
|
|
|
while waited < max_wait_time:
|
|
try:
|
|
# Check pipeline status
|
|
headers = {'X-API-Key': API_KEY}
|
|
status_response = requests.get(f"{LIGHTRAG_URL}/documents/pipeline_status", headers=headers)
|
|
|
|
if status_response.status_code == 200:
|
|
status_data = status_response.json()
|
|
busy = status_data.get('busy', False)
|
|
|
|
if not busy:
|
|
print("✅ Indexing completed")
|
|
break
|
|
else:
|
|
job_name = status_data.get('job_name', 'Unknown')
|
|
print(f"⏳ Still indexing... ({job_name})")
|
|
else:
|
|
print(f"⚠️ Could not get pipeline status: {status_response.status_code}")
|
|
|
|
except Exception as e:
|
|
print(f"⚠️ Error checking pipeline status: {e}")
|
|
|
|
time.sleep(wait_interval)
|
|
waited += wait_interval
|
|
|
|
if waited >= max_wait_time:
|
|
print("❌ Indexing timeout - proceeding with search anyway")
|
|
|
|
# Test search for "bee"
|
|
print("\n🔍 Testing search for 'bee'...")
|
|
try:
|
|
search_payload = {
|
|
"query": "bee",
|
|
"top_k": 10,
|
|
"mode": "local"
|
|
}
|
|
headers = {
|
|
'Content-Type': 'application/json',
|
|
'X-API-Key': API_KEY
|
|
}
|
|
|
|
search_response = requests.post(
|
|
f"{LIGHTRAG_URL}/search",
|
|
json=search_payload,
|
|
headers=headers,
|
|
timeout=30
|
|
)
|
|
|
|
if search_response.status_code == 200:
|
|
search_results = search_response.json()
|
|
results = search_results.get('results', [])
|
|
|
|
print(f"✅ Search completed, found {len(results)} results")
|
|
|
|
# Analyze search results
|
|
bee_found = False
|
|
for i, result in enumerate(results):
|
|
content = result.get('content', '')
|
|
score = result.get('score', 0)
|
|
source = result.get('source', 'Unknown')
|
|
|
|
print(f"\nResult {i+1} (Score: {score:.4f}, Source: {source}):")
|
|
print(f"Content preview: {content[:200]}...")
|
|
|
|
# Check if bee classification is in the content
|
|
if 'bee' in content.lower() or 'classification' in content.lower():
|
|
bee_found = True
|
|
print("🎯 BEE CLASSIFICATION DETECTED IN SEARCH RESULT!")
|
|
|
|
if bee_found:
|
|
print("\n✅ SUCCESS: Bee classification is searchable in Web UI!")
|
|
return True
|
|
else:
|
|
print("\n❌ Bee classification not found in search results")
|
|
print("This might indicate:")
|
|
print("- Classification metadata not properly indexed")
|
|
print("- Search query needs adjustment")
|
|
print("- Indexing may not have completed")
|
|
|
|
# Try alternative search queries
|
|
print("\n🔍 Trying alternative search queries...")
|
|
alternative_queries = ["classification", "image", "photo", "clipart"]
|
|
|
|
for alt_query in alternative_queries:
|
|
alt_payload = {
|
|
"query": alt_query,
|
|
"top_k": 5,
|
|
"mode": "local"
|
|
}
|
|
|
|
alt_response = requests.post(
|
|
f"{LIGHTRAG_URL}/search",
|
|
json=alt_payload,
|
|
headers=headers,
|
|
timeout=10
|
|
)
|
|
|
|
if alt_response.status_code == 200:
|
|
alt_results = alt_response.json().get('results', [])
|
|
if alt_results:
|
|
print(f"Query '{alt_query}': Found {len(alt_results)} results")
|
|
for result in alt_results[:2]: # Show first 2 results
|
|
content_preview = result.get('content', '')[:150]
|
|
print(f" - {content_preview}...")
|
|
else:
|
|
print(f"Query '{alt_query}': No results")
|
|
|
|
return False
|
|
|
|
else:
|
|
print(f"❌ Search failed: {search_response.status_code} - {search_response.text}")
|
|
return False
|
|
|
|
except Exception as e:
|
|
print(f"❌ Search error: {e}")
|
|
return False
|
|
|
|
def check_document_status():
|
|
"""Check the status of uploaded documents"""
|
|
print("\n📊 Checking document status...")
|
|
try:
|
|
headers = {'X-API-Key': API_KEY}
|
|
response = requests.get(f"{LIGHTRAG_URL}/documents", headers=headers)
|
|
|
|
if response.status_code == 200:
|
|
status_data = response.json()
|
|
statuses = status_data.get('statuses', {})
|
|
|
|
for status, docs in statuses.items():
|
|
print(f"{status}: {len(docs)} documents")
|
|
for doc in docs[:3]: # Show first 3 documents of each status
|
|
print(f" - {doc.get('file_path', 'Unknown')} (ID: {doc.get('id', 'Unknown')})")
|
|
else:
|
|
print(f"❌ Could not get document status: {response.status_code}")
|
|
|
|
except Exception as e:
|
|
print(f"❌ Error checking document status: {e}")
|
|
|
|
if __name__ == "__main__":
|
|
print("Web UI Bee Search Test")
|
|
print("This test verifies that bee classification is searchable through the Web UI")
|
|
print("Make sure LightRAG server is running on port 3015")
|
|
print()
|
|
|
|
success = test_webui_search()
|
|
check_document_status()
|
|
|
|
if success:
|
|
print("\n🎉 TEST PASSED: Bee classification is successfully searchable in Web UI!")
|
|
else:
|
|
print("\n💥 TEST FAILED: Bee classification is not searchable in Web UI")
|
|
print("\nTroubleshooting steps:")
|
|
print("1. Check that LightRAG server is running on port 3015")
|
|
print("2. Verify the document processor is using our custom implementation")
|
|
print("3. Check if the test.docx file contains the bee image")
|
|
print("4. Verify that classification metadata is being added to the content")
|
|
print("5. Check LightRAG logs for any processing errors") |