270 lines
9.1 KiB
Python
270 lines
9.1 KiB
Python
"""
|
|
Final Test Workflow for Document Processing Pipeline
|
|
Tests the complete workflow with enhanced entity extraction for bee classification
|
|
"""
|
|
|
|
import os
|
|
import sys
|
|
import time
|
|
import requests
|
|
import json
|
|
from pathlib import Path
|
|
|
|
# Configuration
|
|
LIGHTRAG_URL = "http://localhost:3016"
|
|
API_KEY = "jleu1212"
|
|
TEST_FILE = "test.docx"
|
|
HEADERS = {"X-API-Key": API_KEY}
|
|
|
|
def check_server_status():
|
|
"""Check if LightRAG server is running"""
|
|
try:
|
|
response = requests.get(f"{LIGHTRAG_URL}/", headers=HEADERS, timeout=10)
|
|
return response.status_code == 200
|
|
except Exception as e:
|
|
print(f"❌ Server not reachable: {e}")
|
|
return False
|
|
|
|
def upload_document():
|
|
"""Upload test document to LightRAG"""
|
|
print(f"📤 Uploading {TEST_FILE} to LightRAG...")
|
|
|
|
if not os.path.exists(TEST_FILE):
|
|
print(f"❌ Test file {TEST_FILE} not found")
|
|
return False
|
|
|
|
try:
|
|
with open(TEST_FILE, 'rb') as f:
|
|
files = {'file': (TEST_FILE, f, 'application/vnd.openxmlformats-officedocument.wordprocessingml.document')}
|
|
response = requests.post(
|
|
f"{LIGHTRAG_URL}/documents/upload",
|
|
files=files,
|
|
headers=HEADERS,
|
|
timeout=30
|
|
)
|
|
|
|
if response.status_code == 200:
|
|
print("✅ Document uploaded successfully")
|
|
result = response.json()
|
|
print(f" Upload result: {result}")
|
|
return True
|
|
else:
|
|
print(f"❌ Upload failed: {response.status_code} - {response.text}")
|
|
return False
|
|
|
|
except Exception as e:
|
|
print(f"❌ Upload error: {e}")
|
|
return False
|
|
|
|
def wait_for_processing(max_wait=60):
|
|
"""Wait for document processing to complete"""
|
|
print("⏳ Waiting for document processing...")
|
|
|
|
for attempt in range(max_wait // 5):
|
|
try:
|
|
# Check documents status
|
|
response = requests.get(f"{LIGHTRAG_URL}/documents", headers=HEADERS, timeout=10)
|
|
if response.status_code == 200:
|
|
documents = response.json()
|
|
if documents:
|
|
print(f"📄 Found {len(documents)} documents in system")
|
|
# Check if our document is processed
|
|
for doc in documents:
|
|
print(f" - {doc.get('filename', 'Unknown')}: {doc.get('status', 'Unknown')}")
|
|
|
|
# Assume processing is complete if we see documents
|
|
return True
|
|
|
|
time.sleep(5)
|
|
|
|
except Exception as e:
|
|
print(f"⚠️ Status check error: {e}")
|
|
time.sleep(5)
|
|
|
|
print("❌ Timeout waiting for processing")
|
|
return False
|
|
|
|
def search_for_bee():
|
|
"""Search for bee-related content"""
|
|
print("🔍 Searching for 'bee' in documents...")
|
|
|
|
try:
|
|
search_payload = {
|
|
"query": "bee",
|
|
"top_k": 10,
|
|
"mode": "local"
|
|
}
|
|
|
|
response = requests.post(
|
|
f"{LIGHTRAG_URL}/search",
|
|
json=search_payload,
|
|
headers=HEADERS,
|
|
timeout=30
|
|
)
|
|
|
|
if response.status_code == 200:
|
|
results = response.json()
|
|
print("✅ Search completed successfully")
|
|
|
|
if results and "results" in results and results["results"]:
|
|
print(f"📊 Found {len(results['results'])} results for 'bee':")
|
|
for i, result in enumerate(results["results"]):
|
|
print(f" {i+1}. Score: {result.get('score', 0):.4f}")
|
|
print(f" Content: {result.get('content', '')[:200]}...")
|
|
if "metadata" in result:
|
|
print(f" Metadata: {result.get('metadata', {})}")
|
|
print()
|
|
|
|
# Check if we found bee classification
|
|
bee_found = False
|
|
for result in results["results"]:
|
|
content = result.get("content", "").lower()
|
|
if "bee" in content or "classification" in content:
|
|
bee_found = True
|
|
break
|
|
|
|
if bee_found:
|
|
print("🎉 SUCCESS: Bee classification found in search results!")
|
|
return True
|
|
else:
|
|
print("❌ Bee classification not found in search results")
|
|
return False
|
|
else:
|
|
print("❌ No search results found")
|
|
return False
|
|
else:
|
|
print(f"❌ Search failed: {response.status_code} - {response.text}")
|
|
return False
|
|
|
|
except Exception as e:
|
|
print(f"❌ Search error: {e}")
|
|
return False
|
|
|
|
def search_for_entities():
|
|
"""Search for specific entities related to bee classification"""
|
|
print("🔍 Searching for entity-related terms...")
|
|
|
|
search_terms = [
|
|
"bee image classification",
|
|
"insect",
|
|
"animal",
|
|
"photo of a bee",
|
|
"Entity: Bee"
|
|
]
|
|
|
|
all_results = []
|
|
|
|
for term in search_terms:
|
|
try:
|
|
search_payload = {
|
|
"query": term,
|
|
"top_k": 5,
|
|
"mode": "local"
|
|
}
|
|
|
|
response = requests.post(
|
|
f"{LIGHTRAG_URL}/search",
|
|
json=search_payload,
|
|
headers=HEADERS,
|
|
timeout=30
|
|
)
|
|
|
|
if response.status_code == 200:
|
|
results = response.json()
|
|
if results and "results" in results and results["results"]:
|
|
print(f"✅ Found {len(results['results'])} results for '{term}':")
|
|
for result in results["results"]:
|
|
print(f" - Score: {result.get('score', 0):.4f}")
|
|
print(f" Content: {result.get('content', '')[:150]}...")
|
|
all_results.append(result)
|
|
else:
|
|
print(f"❌ No results for '{term}'")
|
|
else:
|
|
print(f"❌ Search for '{term}' failed: {response.status_code}")
|
|
|
|
except Exception as e:
|
|
print(f"❌ Search for '{term}' error: {e}")
|
|
|
|
return len(all_results) > 0
|
|
|
|
def test_web_ui():
|
|
"""Test Web UI accessibility"""
|
|
print("🌐 Testing Web UI access...")
|
|
|
|
try:
|
|
response = requests.get(f"{LIGHTRAG_URL}/webui", timeout=10)
|
|
if response.status_code == 200:
|
|
print("✅ Web UI is accessible")
|
|
return True
|
|
else:
|
|
print(f"❌ Web UI not accessible: {response.status_code}")
|
|
return False
|
|
except Exception as e:
|
|
print(f"❌ Web UI test error: {e}")
|
|
return False
|
|
|
|
def main():
|
|
"""Main test workflow"""
|
|
print("=" * 60)
|
|
print("🚀 FINAL DOCUMENT PROCESSING WORKFLOW TEST")
|
|
print("=" * 60)
|
|
print(f"📡 Server: {LIGHTRAG_URL}")
|
|
print(f"🔑 API Key: {API_KEY}")
|
|
print(f"📄 Test File: {TEST_FILE}")
|
|
print()
|
|
|
|
# Step 1: Check server status
|
|
print("1. Checking server status...")
|
|
if not check_server_status():
|
|
print("❌ Cannot proceed - server not running")
|
|
return False
|
|
|
|
# Step 2: Upload document
|
|
print("\n2. Uploading document...")
|
|
if not upload_document():
|
|
return False
|
|
|
|
# Step 3: Wait for processing
|
|
print("\n3. Waiting for document processing...")
|
|
if not wait_for_processing():
|
|
print("⚠️ Processing timeout, but continuing with search...")
|
|
|
|
# Step 4: Search for bee
|
|
print("\n4. Testing search functionality...")
|
|
bee_found = search_for_bee()
|
|
|
|
# Step 5: Search for entities
|
|
print("\n5. Testing entity search...")
|
|
entities_found = search_for_entities()
|
|
|
|
# Step 6: Test Web UI
|
|
print("\n6. Testing Web UI...")
|
|
webui_accessible = test_web_ui()
|
|
|
|
# Final results
|
|
print("\n" + "=" * 60)
|
|
print("📊 TEST RESULTS SUMMARY")
|
|
print("=" * 60)
|
|
print(f"✅ Server Status: {'OK' if check_server_status() else 'FAILED'}")
|
|
print(f"✅ Document Upload: {'SUCCESS' if True else 'FAILED'}")
|
|
print(f"✅ Bee Search: {'FOUND' if bee_found else 'NOT FOUND'}")
|
|
print(f"✅ Entity Search: {'FOUND' if entities_found else 'NOT FOUND'}")
|
|
print(f"✅ Web UI: {'ACCESSIBLE' if webui_accessible else 'INACCESSIBLE'}")
|
|
|
|
if bee_found:
|
|
print("\n🎉 SUCCESS: Enhanced document processing with entity extraction is working!")
|
|
print(" Bee classification should now be searchable in the Web UI")
|
|
else:
|
|
print("\n❌ ISSUE: Bee classification not found in search results")
|
|
print(" This may indicate that the enhanced entity extraction needs further tuning")
|
|
|
|
print("\n💡 Next steps:")
|
|
print(" - Open the Web UI at http://localhost:3016/webui")
|
|
print(" - Search for 'bee' to verify classification appears")
|
|
print(" - Check that the first image is recognized as a bee")
|
|
|
|
return bee_found
|
|
|
|
if __name__ == "__main__":
|
|
success = main()
|
|
sys.exit(0 if success else 1) |