""" Final Test Workflow for Document Processing Pipeline Tests the complete workflow with enhanced entity extraction for bee classification """ import os import sys import time import requests import json from pathlib import Path # Configuration LIGHTRAG_URL = "http://localhost:3016" API_KEY = "jleu1212" TEST_FILE = "test.docx" HEADERS = {"X-API-Key": API_KEY} def check_server_status(): """Check if LightRAG server is running""" try: response = requests.get(f"{LIGHTRAG_URL}/", headers=HEADERS, timeout=10) return response.status_code == 200 except Exception as e: print(f"āŒ Server not reachable: {e}") return False def upload_document(): """Upload test document to LightRAG""" print(f"šŸ“¤ Uploading {TEST_FILE} to LightRAG...") if not os.path.exists(TEST_FILE): print(f"āŒ Test file {TEST_FILE} not found") return False try: with open(TEST_FILE, 'rb') as f: files = {'file': (TEST_FILE, f, 'application/vnd.openxmlformats-officedocument.wordprocessingml.document')} response = requests.post( f"{LIGHTRAG_URL}/documents/upload", files=files, headers=HEADERS, timeout=30 ) if response.status_code == 200: print("āœ… Document uploaded successfully") result = response.json() print(f" Upload result: {result}") return True else: print(f"āŒ Upload failed: {response.status_code} - {response.text}") return False except Exception as e: print(f"āŒ Upload error: {e}") return False def wait_for_processing(max_wait=60): """Wait for document processing to complete""" print("ā³ Waiting for document processing...") for attempt in range(max_wait // 5): try: # Check documents status response = requests.get(f"{LIGHTRAG_URL}/documents", headers=HEADERS, timeout=10) if response.status_code == 200: documents = response.json() if documents: print(f"šŸ“„ Found {len(documents)} documents in system") # Check if our document is processed for doc in documents: print(f" - {doc.get('filename', 'Unknown')}: {doc.get('status', 'Unknown')}") # Assume processing is complete if we see documents return True time.sleep(5) except Exception as e: print(f"āš ļø Status check error: {e}") time.sleep(5) print("āŒ Timeout waiting for processing") return False def search_for_bee(): """Search for bee-related content""" print("šŸ” Searching for 'bee' in documents...") try: search_payload = { "query": "bee", "top_k": 10, "mode": "local" } response = requests.post( f"{LIGHTRAG_URL}/search", json=search_payload, headers=HEADERS, timeout=30 ) if response.status_code == 200: results = response.json() print("āœ… Search completed successfully") if results and "results" in results and results["results"]: print(f"šŸ“Š Found {len(results['results'])} results for 'bee':") for i, result in enumerate(results["results"]): print(f" {i+1}. Score: {result.get('score', 0):.4f}") print(f" Content: {result.get('content', '')[:200]}...") if "metadata" in result: print(f" Metadata: {result.get('metadata', {})}") print() # Check if we found bee classification bee_found = False for result in results["results"]: content = result.get("content", "").lower() if "bee" in content or "classification" in content: bee_found = True break if bee_found: print("šŸŽ‰ SUCCESS: Bee classification found in search results!") return True else: print("āŒ Bee classification not found in search results") return False else: print("āŒ No search results found") return False else: print(f"āŒ Search failed: {response.status_code} - {response.text}") return False except Exception as e: print(f"āŒ Search error: {e}") return False def search_for_entities(): """Search for specific entities related to bee classification""" print("šŸ” Searching for entity-related terms...") search_terms = [ "bee image classification", "insect", "animal", "photo of a bee", "Entity: Bee" ] all_results = [] for term in search_terms: try: search_payload = { "query": term, "top_k": 5, "mode": "local" } response = requests.post( f"{LIGHTRAG_URL}/search", json=search_payload, headers=HEADERS, timeout=30 ) if response.status_code == 200: results = response.json() if results and "results" in results and results["results"]: print(f"āœ… Found {len(results['results'])} results for '{term}':") for result in results["results"]: print(f" - Score: {result.get('score', 0):.4f}") print(f" Content: {result.get('content', '')[:150]}...") all_results.append(result) else: print(f"āŒ No results for '{term}'") else: print(f"āŒ Search for '{term}' failed: {response.status_code}") except Exception as e: print(f"āŒ Search for '{term}' error: {e}") return len(all_results) > 0 def test_web_ui(): """Test Web UI accessibility""" print("🌐 Testing Web UI access...") try: response = requests.get(f"{LIGHTRAG_URL}/webui", timeout=10) if response.status_code == 200: print("āœ… Web UI is accessible") return True else: print(f"āŒ Web UI not accessible: {response.status_code}") return False except Exception as e: print(f"āŒ Web UI test error: {e}") return False def main(): """Main test workflow""" print("=" * 60) print("šŸš€ FINAL DOCUMENT PROCESSING WORKFLOW TEST") print("=" * 60) print(f"šŸ“” Server: {LIGHTRAG_URL}") print(f"šŸ”‘ API Key: {API_KEY}") print(f"šŸ“„ Test File: {TEST_FILE}") print() # Step 1: Check server status print("1. Checking server status...") if not check_server_status(): print("āŒ Cannot proceed - server not running") return False # Step 2: Upload document print("\n2. Uploading document...") if not upload_document(): return False # Step 3: Wait for processing print("\n3. Waiting for document processing...") if not wait_for_processing(): print("āš ļø Processing timeout, but continuing with search...") # Step 4: Search for bee print("\n4. Testing search functionality...") bee_found = search_for_bee() # Step 5: Search for entities print("\n5. Testing entity search...") entities_found = search_for_entities() # Step 6: Test Web UI print("\n6. Testing Web UI...") webui_accessible = test_web_ui() # Final results print("\n" + "=" * 60) print("šŸ“Š TEST RESULTS SUMMARY") print("=" * 60) print(f"āœ… Server Status: {'OK' if check_server_status() else 'FAILED'}") print(f"āœ… Document Upload: {'SUCCESS' if True else 'FAILED'}") print(f"āœ… Bee Search: {'FOUND' if bee_found else 'NOT FOUND'}") print(f"āœ… Entity Search: {'FOUND' if entities_found else 'NOT FOUND'}") print(f"āœ… Web UI: {'ACCESSIBLE' if webui_accessible else 'INACCESSIBLE'}") if bee_found: print("\nšŸŽ‰ SUCCESS: Enhanced document processing with entity extraction is working!") print(" Bee classification should now be searchable in the Web UI") else: print("\nāŒ ISSUE: Bee classification not found in search results") print(" This may indicate that the enhanced entity extraction needs further tuning") print("\nšŸ’” Next steps:") print(" - Open the Web UI at http://localhost:3016/webui") print(" - Search for 'bee' to verify classification appears") print(" - Check that the first image is recognized as a bee") return bee_found if __name__ == "__main__": success = main() sys.exit(0 if success else 1)