""" Simple Test for Bee Classification in Document Processing Tests if the enhanced document processor is working without API dependencies """ import requests import time import os # Configuration LIGHTRAG_URL = "http://localhost:3015" API_KEY = "jleu1212" HEADERS = {"X-API-Key": API_KEY} def check_server_status(): """Check if server is running""" print("๐Ÿ” CHECKING SERVER STATUS...") try: response = requests.get(f"{LIGHTRAG_URL}/", headers=HEADERS, timeout=5) if response.status_code == 200: print("โœ… Server is running") return True else: print(f"โŒ Server status: {response.status_code}") return False except Exception as e: print(f"โŒ Server not accessible: {e}") return False def check_documents(): """Check current documents in system""" print("๐Ÿ“„ CHECKING DOCUMENTS...") try: response = requests.get(f"{LIGHTRAG_URL}/documents", headers=HEADERS, timeout=10) if response.status_code == 200: documents = response.json() print(f"๐Ÿ“Š Found {len(documents)} documents:") for doc in documents: print(f" - {doc.get('filename', 'Unknown')}: {doc.get('status', 'unknown')}") return documents else: print(f"โŒ Failed to get documents: {response.status_code}") return [] except Exception as e: print(f"โŒ Error checking documents: {e}") return [] def test_simple_search(): """Test simple search without complex queries""" print("๐Ÿ” TESTING SIMPLE SEARCH...") # Test with simple terms that might be in the document simple_queries = [ "test", "document", "text" ] for query in simple_queries: try: search_payload = { "query": query, "top_k": 5, "mode": "standard" } response = requests.post( f"{LIGHTRAG_URL}/search", json=search_payload, headers=HEADERS, timeout=10 ) if response.status_code == 200: results = response.json() if results.get('results'): print(f"โœ… '{query}': Found {len(results['results'])} results") for result in results['results']: content = result.get('content', '')[:100] score = result.get('score', 0) print(f" Score {score:.4f}: {content}...") else: print(f"โŒ '{query}': No results") else: print(f"โŒ '{query}' search failed: {response.status_code}") except Exception as e: print(f"โŒ '{query}' search error: {e}") def check_document_content(): """Check if we can get document content directly""" print("๐Ÿ“ CHECKING DOCUMENT CONTENT...") try: # Get documents first response = requests.get(f"{LIGHTRAG_URL}/documents", headers=HEADERS, timeout=10) if response.status_code == 200: documents = response.json() for doc in documents: if 'test.docx' in doc.get('filename', '').lower(): doc_id = doc.get('id') print(f"๐Ÿ“„ Found test.docx with ID: {doc_id}") # Try to get document content try: content_response = requests.get( f"{LIGHTRAG_URL}/documents/{doc_id}/content", headers=HEADERS, timeout=10 ) if content_response.status_code == 200: content = content_response.text print(f"โœ… Document content preview (first 500 chars):") print(f" {content[:500]}...") # Check for bee-related content if 'bee' in content.lower(): print("๐ŸŽฏ BEE CLASSIFICATION FOUND IN CONTENT!") return True else: print("โŒ No bee classification found in content") return False else: print(f"โŒ Could not get content: {content_response.status_code}") except Exception as e: print(f"โŒ Error getting content: {e}") return False except Exception as e: print(f"โŒ Error checking document content: {e}") return False def main(): """Main test function""" print("๐Ÿงช SIMPLE BEE CLASSIFICATION TEST") print("=" * 60) # Step 1: Check server status if not check_server_status(): print("โŒ Cannot proceed - server not running") return False # Step 2: Check current documents documents = check_documents() # Step 3: Check if test.docx exists and get its content bee_found = check_document_content() # Step 4: Test simple search test_simple_search() print("\n" + "=" * 60) print("๐Ÿ“Š TEST RESULTS") print("=" * 60) if bee_found: print("๐ŸŽ‰ SUCCESS: Bee classification found in document content!") print(" The enhanced document processor is working correctly.") else: print("โŒ ISSUE: Bee classification not found in document content") print(" The enhanced processor may not be active or bee not detected") print("\n๐Ÿ’ก Next steps:") print(" 1. Check server logs for processing details") print(" 2. Verify the enhanced document processor is being used") print(" 3. Check if OpenCLIP classifier is available") if bee_found: print("\nโœ… TEST PASSED: Bee classification is present in document") return True else: print("\nโŒ TEST FAILED: Bee classification not found") return False if __name__ == "__main__": success = main() if success: print("\n๐ŸŽ‰ The bee classification system is working!") else: print("\nโš ๏ธ Further investigation needed for bee classification.")