Files
railseek6/test_final_workflow.py

270 lines
9.1 KiB
Python

"""
Final Test Workflow for Document Processing Pipeline
Tests the complete workflow with enhanced entity extraction for bee classification
"""
import os
import sys
import time
import requests
import json
from pathlib import Path
# Configuration
LIGHTRAG_URL = "http://localhost:3016"
API_KEY = "jleu1212"
TEST_FILE = "test.docx"
HEADERS = {"X-API-Key": API_KEY}
def check_server_status():
"""Check if LightRAG server is running"""
try:
response = requests.get(f"{LIGHTRAG_URL}/", headers=HEADERS, timeout=10)
return response.status_code == 200
except Exception as e:
print(f"❌ Server not reachable: {e}")
return False
def upload_document():
"""Upload test document to LightRAG"""
print(f"📤 Uploading {TEST_FILE} to LightRAG...")
if not os.path.exists(TEST_FILE):
print(f"❌ Test file {TEST_FILE} not found")
return False
try:
with open(TEST_FILE, 'rb') as f:
files = {'file': (TEST_FILE, f, 'application/vnd.openxmlformats-officedocument.wordprocessingml.document')}
response = requests.post(
f"{LIGHTRAG_URL}/documents/upload",
files=files,
headers=HEADERS,
timeout=30
)
if response.status_code == 200:
print("✅ Document uploaded successfully")
result = response.json()
print(f" Upload result: {result}")
return True
else:
print(f"❌ Upload failed: {response.status_code} - {response.text}")
return False
except Exception as e:
print(f"❌ Upload error: {e}")
return False
def wait_for_processing(max_wait=60):
"""Wait for document processing to complete"""
print("⏳ Waiting for document processing...")
for attempt in range(max_wait // 5):
try:
# Check documents status
response = requests.get(f"{LIGHTRAG_URL}/documents", headers=HEADERS, timeout=10)
if response.status_code == 200:
documents = response.json()
if documents:
print(f"📄 Found {len(documents)} documents in system")
# Check if our document is processed
for doc in documents:
print(f" - {doc.get('filename', 'Unknown')}: {doc.get('status', 'Unknown')}")
# Assume processing is complete if we see documents
return True
time.sleep(5)
except Exception as e:
print(f"⚠️ Status check error: {e}")
time.sleep(5)
print("❌ Timeout waiting for processing")
return False
def search_for_bee():
"""Search for bee-related content"""
print("🔍 Searching for 'bee' in documents...")
try:
search_payload = {
"query": "bee",
"top_k": 10,
"mode": "local"
}
response = requests.post(
f"{LIGHTRAG_URL}/search",
json=search_payload,
headers=HEADERS,
timeout=30
)
if response.status_code == 200:
results = response.json()
print("✅ Search completed successfully")
if results and "results" in results and results["results"]:
print(f"📊 Found {len(results['results'])} results for 'bee':")
for i, result in enumerate(results["results"]):
print(f" {i+1}. Score: {result.get('score', 0):.4f}")
print(f" Content: {result.get('content', '')[:200]}...")
if "metadata" in result:
print(f" Metadata: {result.get('metadata', {})}")
print()
# Check if we found bee classification
bee_found = False
for result in results["results"]:
content = result.get("content", "").lower()
if "bee" in content or "classification" in content:
bee_found = True
break
if bee_found:
print("🎉 SUCCESS: Bee classification found in search results!")
return True
else:
print("❌ Bee classification not found in search results")
return False
else:
print("❌ No search results found")
return False
else:
print(f"❌ Search failed: {response.status_code} - {response.text}")
return False
except Exception as e:
print(f"❌ Search error: {e}")
return False
def search_for_entities():
"""Search for specific entities related to bee classification"""
print("🔍 Searching for entity-related terms...")
search_terms = [
"bee image classification",
"insect",
"animal",
"photo of a bee",
"Entity: Bee"
]
all_results = []
for term in search_terms:
try:
search_payload = {
"query": term,
"top_k": 5,
"mode": "local"
}
response = requests.post(
f"{LIGHTRAG_URL}/search",
json=search_payload,
headers=HEADERS,
timeout=30
)
if response.status_code == 200:
results = response.json()
if results and "results" in results and results["results"]:
print(f"✅ Found {len(results['results'])} results for '{term}':")
for result in results["results"]:
print(f" - Score: {result.get('score', 0):.4f}")
print(f" Content: {result.get('content', '')[:150]}...")
all_results.append(result)
else:
print(f"❌ No results for '{term}'")
else:
print(f"❌ Search for '{term}' failed: {response.status_code}")
except Exception as e:
print(f"❌ Search for '{term}' error: {e}")
return len(all_results) > 0
def test_web_ui():
"""Test Web UI accessibility"""
print("🌐 Testing Web UI access...")
try:
response = requests.get(f"{LIGHTRAG_URL}/webui", timeout=10)
if response.status_code == 200:
print("✅ Web UI is accessible")
return True
else:
print(f"❌ Web UI not accessible: {response.status_code}")
return False
except Exception as e:
print(f"❌ Web UI test error: {e}")
return False
def main():
"""Main test workflow"""
print("=" * 60)
print("🚀 FINAL DOCUMENT PROCESSING WORKFLOW TEST")
print("=" * 60)
print(f"📡 Server: {LIGHTRAG_URL}")
print(f"🔑 API Key: {API_KEY}")
print(f"📄 Test File: {TEST_FILE}")
print()
# Step 1: Check server status
print("1. Checking server status...")
if not check_server_status():
print("❌ Cannot proceed - server not running")
return False
# Step 2: Upload document
print("\n2. Uploading document...")
if not upload_document():
return False
# Step 3: Wait for processing
print("\n3. Waiting for document processing...")
if not wait_for_processing():
print("⚠️ Processing timeout, but continuing with search...")
# Step 4: Search for bee
print("\n4. Testing search functionality...")
bee_found = search_for_bee()
# Step 5: Search for entities
print("\n5. Testing entity search...")
entities_found = search_for_entities()
# Step 6: Test Web UI
print("\n6. Testing Web UI...")
webui_accessible = test_web_ui()
# Final results
print("\n" + "=" * 60)
print("📊 TEST RESULTS SUMMARY")
print("=" * 60)
print(f"✅ Server Status: {'OK' if check_server_status() else 'FAILED'}")
print(f"✅ Document Upload: {'SUCCESS' if True else 'FAILED'}")
print(f"✅ Bee Search: {'FOUND' if bee_found else 'NOT FOUND'}")
print(f"✅ Entity Search: {'FOUND' if entities_found else 'NOT FOUND'}")
print(f"✅ Web UI: {'ACCESSIBLE' if webui_accessible else 'INACCESSIBLE'}")
if bee_found:
print("\n🎉 SUCCESS: Enhanced document processing with entity extraction is working!")
print(" Bee classification should now be searchable in the Web UI")
else:
print("\n❌ ISSUE: Bee classification not found in search results")
print(" This may indicate that the enhanced entity extraction needs further tuning")
print("\n💡 Next steps:")
print(" - Open the Web UI at http://localhost:3016/webui")
print(" - Search for 'bee' to verify classification appears")
print(" - Check that the first image is recognized as a bee")
return bee_found
if __name__ == "__main__":
success = main()
sys.exit(0 if success else 1)