""" Simple Search Test - Bypasses entity extraction issues Tests the core document processing and search functionality """ import os import sys import time import requests import json from pathlib import Path # Configuration LIGHTRAG_URL = "http://localhost:3016" API_KEY = "jleu1212" TEST_FILE = "test.docx" HEADERS = {"X-API-Key": API_KEY} def test_direct_search(): """Test direct search without entity extraction dependencies""" print("🔍 Testing direct search functionality...") try: # Try a simple search that doesn't require entity extraction search_payload = { "query": "test", "top_k": 5, "mode": "local" } response = requests.post( f"{LIGHTRAG_URL}/search", json=search_payload, headers=HEADERS, timeout=30 ) if response.status_code == 200: results = response.json() print("✅ Search completed successfully") print(f"📊 Found {len(results.get('results', []))} results") # Print results for debugging for i, result in enumerate(results.get('results', [])): print(f" {i+1}. Score: {result.get('score', 0):.4f}") content = result.get('content', '') print(f" Content: {content[:200]}...") return True else: print(f"❌ Search failed: {response.status_code} - {response.text}") return False except Exception as e: print(f"❌ Search error: {e}") return False def test_documents_endpoint(): """Test the documents endpoint to see what's indexed""" print("📄 Checking documents endpoint...") try: response = requests.get(f"{LIGHTRAG_URL}/documents", headers=HEADERS, timeout=10) if response.status_code == 200: documents = response.json() print(f"✅ Found {len(documents)} documents in system") for doc in documents: print(f" - {doc.get('filename', 'Unknown')}: {doc.get('status', 'Unknown')}") if 'metadata' in doc: print(f" Metadata: {doc.get('metadata', {})}") return True else: print(f"❌ Documents endpoint failed: {response.status_code} - {response.text}") return False except Exception as e: print(f"❌ Documents endpoint error: {e}") return False def test_health_endpoint(): """Test server health""" print("🏥 Testing server health...") try: response = requests.get(f"{LIGHTRAG_URL}/", headers=HEADERS, timeout=10) if response.status_code == 200: print("✅ Server is healthy") return True else: print(f"❌ Server health check failed: {response.status_code}") return False except Exception as e: print(f"❌ Server health error: {e}") return False def check_document_content(): """Check if document content contains bee classification""" print("🔎 Checking document content for bee classification...") try: # First get all documents response = requests.get(f"{LIGHTRAG_URL}/documents", headers=HEADERS, timeout=10) if response.status_code == 200: documents = response.json() for doc in documents: if 'test.docx' in doc.get('filename', ''): print(f"📄 Found test.docx: {doc}") # Try to get document details doc_id = doc.get('id') if doc_id: detail_response = requests.get( f"{LIGHTRAG_URL}/documents/{doc_id}", headers=HEADERS, timeout=10 ) if detail_response.status_code == 200: doc_detail = detail_response.json() print(f"📋 Document details: {doc_detail}") return True print("❌ test.docx not found in documents") return False else: print(f"❌ Could not get documents: {response.status_code}") return False except Exception as e: print(f"❌ Document content check error: {e}") return False def test_local_search(): """Test search with local mode to avoid LLM dependencies""" print("🔍 Testing local search mode...") search_terms = [ "test", "document", "image", "classification" ] for term in search_terms: try: search_payload = { "query": term, "top_k": 3, "mode": "local" } response = requests.post( f"{LIGHTRAG_URL}/search", json=search_payload, headers=HEADERS, timeout=30 ) if response.status_code == 200: results = response.json() if results.get('results'): print(f"✅ Found {len(results['results'])} results for '{term}'") for result in results['results']: content = result.get('content', '') if 'bee' in content.lower(): print(f"🎉 FOUND BEE IN SEARCH: {content[:200]}...") else: print(f"❌ No results for '{term}'") else: print(f"❌ Search for '{term}' failed: {response.status_code}") except Exception as e: print(f"❌ Search for '{term}' error: {e}") def main(): """Main test function""" print("=" * 50) print("🔧 SIMPLE SEARCH TEST") print("=" * 50) print(f"📡 Server: {LIGHTRAG_URL}") print() # Test 1: Server health print("1. Testing server health...") if not test_health_endpoint(): print("❌ Cannot proceed - server not healthy") return False # Test 2: Check documents print("\n2. Checking documents...") test_documents_endpoint() # Test 3: Check document content print("\n3. Checking document content...") check_document_content() # Test 4: Simple search print("\n4. Testing simple search...") test_direct_search() # Test 5: Local search with various terms print("\n5. Testing local search with various terms...") test_local_search() print("\n" + "=" * 50) print("📊 SIMPLE TEST COMPLETE") print("=" * 50) print("💡 Next steps:") print(" - Check the server logs for document processing details") print(" - Verify test.docx was processed with image extraction") print(" - Look for 'bee' classification in the processed content") return True if __name__ == "__main__": success = main() sys.exit(0 if success else 1)