204 lines
7.1 KiB
Python
204 lines
7.1 KiB
Python
"""
|
|
Test Enhanced Document Processor with Bee Classification
|
|
Uploads test.docx and verifies bee classification is searchable
|
|
"""
|
|
|
|
import requests
|
|
import time
|
|
import os
|
|
|
|
# Configuration
|
|
LIGHTRAG_URL = "http://localhost:3015"
|
|
API_KEY = "jleu1212"
|
|
HEADERS = {"X-API-Key": API_KEY}
|
|
|
|
def clear_existing_documents():
|
|
"""Clear existing documents to ensure fresh processing"""
|
|
print("🗑️ CLEARING EXISTING DOCUMENTS...")
|
|
|
|
try:
|
|
# Get current documents
|
|
response = requests.get(f"{LIGHTRAG_URL}/documents", headers=HEADERS, timeout=10)
|
|
if response.status_code == 200:
|
|
documents = response.json()
|
|
if isinstance(documents, list):
|
|
for doc in documents:
|
|
if 'id' in doc:
|
|
doc_id = doc['id']
|
|
delete_response = requests.delete(
|
|
f"{LIGHTRAG_URL}/documents/{doc_id}",
|
|
headers=HEADERS,
|
|
timeout=10
|
|
)
|
|
if delete_response.status_code == 200:
|
|
print(f"✅ Deleted document: {doc.get('filename', 'Unknown')}")
|
|
else:
|
|
print(f"❌ Failed to delete document: {delete_response.status_code}")
|
|
print("✅ All documents cleared")
|
|
else:
|
|
print("❌ Could not get documents list")
|
|
except Exception as e:
|
|
print(f"❌ Error clearing documents: {e}")
|
|
|
|
def upload_test_document():
|
|
"""Upload test.docx for processing with enhanced processor"""
|
|
print("📤 UPLOADING TEST DOCUMENT WITH ENHANCED PROCESSOR...")
|
|
|
|
test_file = "test.docx"
|
|
if not os.path.exists(test_file):
|
|
print(f"❌ Test file {test_file} not found")
|
|
return False
|
|
|
|
try:
|
|
with open(test_file, 'rb') as f:
|
|
files = {'file': (test_file, f, 'application/vnd.openxmlformats-officedocument.wordprocessingml.document')}
|
|
response = requests.post(
|
|
f"{LIGHTRAG_URL}/documents/upload",
|
|
files=files,
|
|
headers=HEADERS,
|
|
timeout=60 # Longer timeout for processing
|
|
)
|
|
|
|
if response.status_code == 200:
|
|
print("✅ Document uploaded successfully")
|
|
result = response.json()
|
|
print(f" Upload result: {result}")
|
|
return True
|
|
else:
|
|
print(f"❌ Upload failed: {response.status_code} - {response.text}")
|
|
return False
|
|
|
|
except Exception as e:
|
|
print(f"❌ Upload error: {e}")
|
|
return False
|
|
|
|
def wait_for_processing():
|
|
"""Wait for document processing to complete"""
|
|
print("⏳ WAITING FOR DOCUMENT PROCESSING...")
|
|
|
|
for attempt in range(20): # Wait up to 2 minutes
|
|
try:
|
|
response = requests.get(f"{LIGHTRAG_URL}/documents", headers=HEADERS, timeout=10)
|
|
if response.status_code == 200:
|
|
documents = response.json()
|
|
if isinstance(documents, list):
|
|
for doc in documents:
|
|
if 'test.docx' in doc.get('filename', '').lower():
|
|
status = doc.get('status', 'unknown')
|
|
print(f"📄 Document status: {status}")
|
|
if status == 'processed':
|
|
print("✅ Document processing completed")
|
|
return True
|
|
time.sleep(6)
|
|
except Exception as e:
|
|
print(f"⚠️ Status check error: {e}")
|
|
time.sleep(6)
|
|
|
|
print("❌ Timeout waiting for processing")
|
|
return False
|
|
|
|
def test_bee_search():
|
|
"""Test if bee classification is now searchable"""
|
|
print("🔍 TESTING BEE SEARCH...")
|
|
|
|
search_queries = [
|
|
"bee",
|
|
"Bee",
|
|
"classification",
|
|
"photo of a bee",
|
|
"Entity: Bee",
|
|
"insect",
|
|
"animal"
|
|
]
|
|
|
|
bee_found = False
|
|
|
|
for query in search_queries:
|
|
try:
|
|
search_payload = {
|
|
"query": query,
|
|
"top_k": 10,
|
|
"mode": "hybrid" # Use hybrid mode which worked in diagnostics
|
|
}
|
|
|
|
response = requests.post(
|
|
f"{LIGHTRAG_URL}/search",
|
|
json=search_payload,
|
|
headers=HEADERS,
|
|
timeout=30
|
|
)
|
|
|
|
if response.status_code == 200:
|
|
results = response.json()
|
|
if results.get('results'):
|
|
print(f"✅ '{query}': Found {len(results['results'])} results")
|
|
|
|
# Check if any result contains bee-related content
|
|
for result in results['results']:
|
|
content = result.get('content', '').lower()
|
|
score = result.get('score', 0)
|
|
|
|
if 'bee' in content or 'classification' in content:
|
|
print(f"🎯 BEE FOUND: Score {score:.4f}")
|
|
print(f" Content: {content[:200]}...")
|
|
bee_found = True
|
|
else:
|
|
print(f"❌ '{query}': No results")
|
|
else:
|
|
print(f"❌ '{query}' search failed: {response.status_code}")
|
|
|
|
except Exception as e:
|
|
print(f"❌ '{query}' search error: {e}")
|
|
|
|
return bee_found
|
|
|
|
def main():
|
|
"""Main test function"""
|
|
print("🧪 TESTING ENHANCED DOCUMENT PROCESSOR")
|
|
print("=" * 60)
|
|
|
|
# Step 1: Clear existing documents
|
|
clear_existing_documents()
|
|
|
|
# Step 2: Upload test document
|
|
if not upload_test_document():
|
|
print("❌ Document upload failed")
|
|
return False
|
|
|
|
# Step 3: Wait for processing
|
|
if not wait_for_processing():
|
|
print("⚠️ Processing timeout, but continuing with search...")
|
|
|
|
# Step 4: Test bee search
|
|
bee_found = test_bee_search()
|
|
|
|
print("\n" + "=" * 60)
|
|
print("📊 TEST RESULTS")
|
|
print("=" * 60)
|
|
|
|
if bee_found:
|
|
print("🎉 SUCCESS: Bee classification is now searchable!")
|
|
print(" The enhanced document processor is working correctly.")
|
|
print(" The Web UI should now detect bee classification.")
|
|
else:
|
|
print("❌ ISSUE: Bee classification still not searchable")
|
|
print(" There may be an issue with the enhanced processor")
|
|
print(" or the image classification is not running.")
|
|
|
|
print("\n💡 Next steps:")
|
|
print(" 1. Open the Web UI at http://localhost:3015/webui")
|
|
print(" 2. Search for 'bee' to verify classification appears")
|
|
|
|
if bee_found:
|
|
print("\n✅ TEST PASSED: Web UI should now detect bee classification")
|
|
return True
|
|
else:
|
|
print("\n❌ TEST FAILED: Further investigation needed")
|
|
return False
|
|
|
|
if __name__ == "__main__":
|
|
success = main()
|
|
if success:
|
|
print("\n🎉 The enhanced document processor is working correctly!")
|
|
else:
|
|
print("\n⚠️ The enhanced document processor needs investigation.") |