#!/usr/bin/env python3 """ Test script to verify Web UI search functionality for bee classification This tests the complete pipeline including LightRAG's document processing """ import requests import json import time import os from pathlib import Path # Configuration LIGHTRAG_URL = "http://localhost:3015" TEST_DOCX_PATH = "test.docx" API_KEY = "jleu1212" # Correct API key from zrun.bat def test_webui_search(): """Test complete Web UI search pipeline for bee classification""" print("๐Ÿงช Testing Web UI Bee Search Pipeline") print("=" * 50) # Check if LightRAG server is running try: response = requests.get(f"{LIGHTRAG_URL}/health", timeout=10) if response.status_code != 200: print("โŒ LightRAG server is not responding") return False print("โœ… LightRAG server is running") except Exception as e: print(f"โŒ Cannot connect to LightRAG server: {e}") return False # Check if test document exists if not os.path.exists(TEST_DOCX_PATH): print(f"โŒ Test document not found: {TEST_DOCX_PATH}") return False print(f"โœ… Test document found: {TEST_DOCX_PATH}") # Upload test document print("\n๐Ÿ“ค Uploading test document...") try: with open(TEST_DOCX_PATH, 'rb') as f: files = {'file': (TEST_DOCX_PATH, f, 'application/vnd.openxmlformats-officedocument.wordprocessingml.document')} headers = {'X-API-Key': API_KEY} response = requests.post(f"{LIGHTRAG_URL}/documents/upload", files=files, headers=headers) if response.status_code == 200: upload_result = response.json() track_id = upload_result.get('track_id') print(f"โœ… Document uploaded successfully, track_id: {track_id}") else: print(f"โŒ Upload failed: {response.status_code} - {response.text}") return False except Exception as e: print(f"โŒ Upload error: {e}") return False # Wait for indexing to complete print("\nโณ Waiting for indexing to complete...") max_wait_time = 120 # 2 minutes wait_interval = 5 waited = 0 while waited < max_wait_time: try: # Check pipeline status headers = {'X-API-Key': API_KEY} status_response = requests.get(f"{LIGHTRAG_URL}/documents/pipeline_status", headers=headers) if status_response.status_code == 200: status_data = status_response.json() busy = status_data.get('busy', False) if not busy: print("โœ… Indexing completed") break else: job_name = status_data.get('job_name', 'Unknown') print(f"โณ Still indexing... ({job_name})") else: print(f"โš ๏ธ Could not get pipeline status: {status_response.status_code}") except Exception as e: print(f"โš ๏ธ Error checking pipeline status: {e}") time.sleep(wait_interval) waited += wait_interval if waited >= max_wait_time: print("โŒ Indexing timeout - proceeding with search anyway") # Test search for "bee" print("\n๐Ÿ” Testing search for 'bee'...") try: search_payload = { "query": "bee", "top_k": 10, "mode": "local" } headers = { 'Content-Type': 'application/json', 'X-API-Key': API_KEY } search_response = requests.post( f"{LIGHTRAG_URL}/search", json=search_payload, headers=headers, timeout=30 ) if search_response.status_code == 200: search_results = search_response.json() results = search_results.get('results', []) print(f"โœ… Search completed, found {len(results)} results") # Analyze search results bee_found = False for i, result in enumerate(results): content = result.get('content', '') score = result.get('score', 0) source = result.get('source', 'Unknown') print(f"\nResult {i+1} (Score: {score:.4f}, Source: {source}):") print(f"Content preview: {content[:200]}...") # Check if bee classification is in the content if 'bee' in content.lower() or 'classification' in content.lower(): bee_found = True print("๐ŸŽฏ BEE CLASSIFICATION DETECTED IN SEARCH RESULT!") if bee_found: print("\nโœ… SUCCESS: Bee classification is searchable in Web UI!") return True else: print("\nโŒ Bee classification not found in search results") print("This might indicate:") print("- Classification metadata not properly indexed") print("- Search query needs adjustment") print("- Indexing may not have completed") # Try alternative search queries print("\n๐Ÿ” Trying alternative search queries...") alternative_queries = ["classification", "image", "photo", "clipart"] for alt_query in alternative_queries: alt_payload = { "query": alt_query, "top_k": 5, "mode": "local" } alt_response = requests.post( f"{LIGHTRAG_URL}/search", json=alt_payload, headers=headers, timeout=10 ) if alt_response.status_code == 200: alt_results = alt_response.json().get('results', []) if alt_results: print(f"Query '{alt_query}': Found {len(alt_results)} results") for result in alt_results[:2]: # Show first 2 results content_preview = result.get('content', '')[:150] print(f" - {content_preview}...") else: print(f"Query '{alt_query}': No results") return False else: print(f"โŒ Search failed: {search_response.status_code} - {search_response.text}") return False except Exception as e: print(f"โŒ Search error: {e}") return False def check_document_status(): """Check the status of uploaded documents""" print("\n๐Ÿ“Š Checking document status...") try: headers = {'X-API-Key': API_KEY} response = requests.get(f"{LIGHTRAG_URL}/documents", headers=headers) if response.status_code == 200: status_data = response.json() statuses = status_data.get('statuses', {}) for status, docs in statuses.items(): print(f"{status}: {len(docs)} documents") for doc in docs[:3]: # Show first 3 documents of each status print(f" - {doc.get('file_path', 'Unknown')} (ID: {doc.get('id', 'Unknown')})") else: print(f"โŒ Could not get document status: {response.status_code}") except Exception as e: print(f"โŒ Error checking document status: {e}") if __name__ == "__main__": print("Web UI Bee Search Test") print("This test verifies that bee classification is searchable through the Web UI") print("Make sure LightRAG server is running on port 3015") print() success = test_webui_search() check_document_status() if success: print("\n๐ŸŽ‰ TEST PASSED: Bee classification is successfully searchable in Web UI!") else: print("\n๐Ÿ’ฅ TEST FAILED: Bee classification is not searchable in Web UI") print("\nTroubleshooting steps:") print("1. Check that LightRAG server is running on port 3015") print("2. Verify the document processor is using our custom implementation") print("3. Check if the test.docx file contains the bee image") print("4. Verify that classification metadata is being added to the content") print("5. Check LightRAG logs for any processing errors")