""" Test Enhanced Document Processor with Bee Classification Uploads test.docx and verifies bee classification is searchable """ import requests import time import os # Configuration LIGHTRAG_URL = "http://localhost:3015" API_KEY = "jleu1212" HEADERS = {"X-API-Key": API_KEY} def clear_existing_documents(): """Clear existing documents to ensure fresh processing""" print("๐Ÿ—‘๏ธ CLEARING EXISTING DOCUMENTS...") try: # Get current documents response = requests.get(f"{LIGHTRAG_URL}/documents", headers=HEADERS, timeout=10) if response.status_code == 200: documents = response.json() if isinstance(documents, list): for doc in documents: if 'id' in doc: doc_id = doc['id'] delete_response = requests.delete( f"{LIGHTRAG_URL}/documents/{doc_id}", headers=HEADERS, timeout=10 ) if delete_response.status_code == 200: print(f"โœ… Deleted document: {doc.get('filename', 'Unknown')}") else: print(f"โŒ Failed to delete document: {delete_response.status_code}") print("โœ… All documents cleared") else: print("โŒ Could not get documents list") except Exception as e: print(f"โŒ Error clearing documents: {e}") def upload_test_document(): """Upload test.docx for processing with enhanced processor""" print("๐Ÿ“ค UPLOADING TEST DOCUMENT WITH ENHANCED PROCESSOR...") test_file = "test.docx" if not os.path.exists(test_file): print(f"โŒ Test file {test_file} not found") return False try: with open(test_file, 'rb') as f: files = {'file': (test_file, f, 'application/vnd.openxmlformats-officedocument.wordprocessingml.document')} response = requests.post( f"{LIGHTRAG_URL}/documents/upload", files=files, headers=HEADERS, timeout=60 # Longer timeout for processing ) if response.status_code == 200: print("โœ… Document uploaded successfully") result = response.json() print(f" Upload result: {result}") return True else: print(f"โŒ Upload failed: {response.status_code} - {response.text}") return False except Exception as e: print(f"โŒ Upload error: {e}") return False def wait_for_processing(): """Wait for document processing to complete""" print("โณ WAITING FOR DOCUMENT PROCESSING...") for attempt in range(20): # Wait up to 2 minutes try: response = requests.get(f"{LIGHTRAG_URL}/documents", headers=HEADERS, timeout=10) if response.status_code == 200: documents = response.json() if isinstance(documents, list): for doc in documents: if 'test.docx' in doc.get('filename', '').lower(): status = doc.get('status', 'unknown') print(f"๐Ÿ“„ Document status: {status}") if status == 'processed': print("โœ… Document processing completed") return True time.sleep(6) except Exception as e: print(f"โš ๏ธ Status check error: {e}") time.sleep(6) print("โŒ Timeout waiting for processing") return False def test_bee_search(): """Test if bee classification is now searchable""" print("๐Ÿ” TESTING BEE SEARCH...") search_queries = [ "bee", "Bee", "classification", "photo of a bee", "Entity: Bee", "insect", "animal" ] bee_found = False for query in search_queries: try: search_payload = { "query": query, "top_k": 10, "mode": "hybrid" # Use hybrid mode which worked in diagnostics } response = requests.post( f"{LIGHTRAG_URL}/search", json=search_payload, headers=HEADERS, timeout=30 ) if response.status_code == 200: results = response.json() if results.get('results'): print(f"โœ… '{query}': Found {len(results['results'])} results") # Check if any result contains bee-related content for result in results['results']: content = result.get('content', '').lower() score = result.get('score', 0) if 'bee' in content or 'classification' in content: print(f"๐ŸŽฏ BEE FOUND: Score {score:.4f}") print(f" Content: {content[:200]}...") bee_found = True else: print(f"โŒ '{query}': No results") else: print(f"โŒ '{query}' search failed: {response.status_code}") except Exception as e: print(f"โŒ '{query}' search error: {e}") return bee_found def main(): """Main test function""" print("๐Ÿงช TESTING ENHANCED DOCUMENT PROCESSOR") print("=" * 60) # Step 1: Clear existing documents clear_existing_documents() # Step 2: Upload test document if not upload_test_document(): print("โŒ Document upload failed") return False # Step 3: Wait for processing if not wait_for_processing(): print("โš ๏ธ Processing timeout, but continuing with search...") # Step 4: Test bee search bee_found = test_bee_search() print("\n" + "=" * 60) print("๐Ÿ“Š TEST RESULTS") print("=" * 60) if bee_found: print("๐ŸŽ‰ SUCCESS: Bee classification is now searchable!") print(" The enhanced document processor is working correctly.") print(" The Web UI should now detect bee classification.") else: print("โŒ ISSUE: Bee classification still not searchable") print(" There may be an issue with the enhanced processor") print(" or the image classification is not running.") print("\n๐Ÿ’ก Next steps:") print(" 1. Open the Web UI at http://localhost:3015/webui") print(" 2. Search for 'bee' to verify classification appears") if bee_found: print("\nโœ… TEST PASSED: Web UI should now detect bee classification") return True else: print("\nโŒ TEST FAILED: Further investigation needed") return False if __name__ == "__main__": success = main() if success: print("\n๐ŸŽ‰ The enhanced document processor is working correctly!") else: print("\nโš ๏ธ The enhanced document processor needs investigation.")