179 lines
6.3 KiB
Python
179 lines
6.3 KiB
Python
"""
|
|
Simple Test for Bee Classification in Document Processing
|
|
Tests if the enhanced document processor is working without API dependencies
|
|
"""
|
|
|
|
import requests
|
|
import time
|
|
import os
|
|
|
|
# Configuration
|
|
LIGHTRAG_URL = "http://localhost:3015"
|
|
API_KEY = "jleu1212"
|
|
HEADERS = {"X-API-Key": API_KEY}
|
|
|
|
def check_server_status():
|
|
"""Check if server is running"""
|
|
print("🔍 CHECKING SERVER STATUS...")
|
|
try:
|
|
response = requests.get(f"{LIGHTRAG_URL}/", headers=HEADERS, timeout=5)
|
|
if response.status_code == 200:
|
|
print("✅ Server is running")
|
|
return True
|
|
else:
|
|
print(f"❌ Server status: {response.status_code}")
|
|
return False
|
|
except Exception as e:
|
|
print(f"❌ Server not accessible: {e}")
|
|
return False
|
|
|
|
def check_documents():
|
|
"""Check current documents in system"""
|
|
print("📄 CHECKING DOCUMENTS...")
|
|
try:
|
|
response = requests.get(f"{LIGHTRAG_URL}/documents", headers=HEADERS, timeout=10)
|
|
if response.status_code == 200:
|
|
documents = response.json()
|
|
print(f"📊 Found {len(documents)} documents:")
|
|
for doc in documents:
|
|
print(f" - {doc.get('filename', 'Unknown')}: {doc.get('status', 'unknown')}")
|
|
return documents
|
|
else:
|
|
print(f"❌ Failed to get documents: {response.status_code}")
|
|
return []
|
|
except Exception as e:
|
|
print(f"❌ Error checking documents: {e}")
|
|
return []
|
|
|
|
def test_simple_search():
|
|
"""Test simple search without complex queries"""
|
|
print("🔍 TESTING SIMPLE SEARCH...")
|
|
|
|
# Test with simple terms that might be in the document
|
|
simple_queries = [
|
|
"test",
|
|
"document",
|
|
"text"
|
|
]
|
|
|
|
for query in simple_queries:
|
|
try:
|
|
search_payload = {
|
|
"query": query,
|
|
"top_k": 5,
|
|
"mode": "standard"
|
|
}
|
|
|
|
response = requests.post(
|
|
f"{LIGHTRAG_URL}/search",
|
|
json=search_payload,
|
|
headers=HEADERS,
|
|
timeout=10
|
|
)
|
|
|
|
if response.status_code == 200:
|
|
results = response.json()
|
|
if results.get('results'):
|
|
print(f"✅ '{query}': Found {len(results['results'])} results")
|
|
for result in results['results']:
|
|
content = result.get('content', '')[:100]
|
|
score = result.get('score', 0)
|
|
print(f" Score {score:.4f}: {content}...")
|
|
else:
|
|
print(f"❌ '{query}': No results")
|
|
else:
|
|
print(f"❌ '{query}' search failed: {response.status_code}")
|
|
|
|
except Exception as e:
|
|
print(f"❌ '{query}' search error: {e}")
|
|
|
|
def check_document_content():
|
|
"""Check if we can get document content directly"""
|
|
print("📝 CHECKING DOCUMENT CONTENT...")
|
|
|
|
try:
|
|
# Get documents first
|
|
response = requests.get(f"{LIGHTRAG_URL}/documents", headers=HEADERS, timeout=10)
|
|
if response.status_code == 200:
|
|
documents = response.json()
|
|
for doc in documents:
|
|
if 'test.docx' in doc.get('filename', '').lower():
|
|
doc_id = doc.get('id')
|
|
print(f"📄 Found test.docx with ID: {doc_id}")
|
|
|
|
# Try to get document content
|
|
try:
|
|
content_response = requests.get(
|
|
f"{LIGHTRAG_URL}/documents/{doc_id}/content",
|
|
headers=HEADERS,
|
|
timeout=10
|
|
)
|
|
if content_response.status_code == 200:
|
|
content = content_response.text
|
|
print(f"✅ Document content preview (first 500 chars):")
|
|
print(f" {content[:500]}...")
|
|
|
|
# Check for bee-related content
|
|
if 'bee' in content.lower():
|
|
print("🎯 BEE CLASSIFICATION FOUND IN CONTENT!")
|
|
return True
|
|
else:
|
|
print("❌ No bee classification found in content")
|
|
return False
|
|
else:
|
|
print(f"❌ Could not get content: {content_response.status_code}")
|
|
except Exception as e:
|
|
print(f"❌ Error getting content: {e}")
|
|
return False
|
|
except Exception as e:
|
|
print(f"❌ Error checking document content: {e}")
|
|
return False
|
|
|
|
def main():
|
|
"""Main test function"""
|
|
print("🧪 SIMPLE BEE CLASSIFICATION TEST")
|
|
print("=" * 60)
|
|
|
|
# Step 1: Check server status
|
|
if not check_server_status():
|
|
print("❌ Cannot proceed - server not running")
|
|
return False
|
|
|
|
# Step 2: Check current documents
|
|
documents = check_documents()
|
|
|
|
# Step 3: Check if test.docx exists and get its content
|
|
bee_found = check_document_content()
|
|
|
|
# Step 4: Test simple search
|
|
test_simple_search()
|
|
|
|
print("\n" + "=" * 60)
|
|
print("📊 TEST RESULTS")
|
|
print("=" * 60)
|
|
|
|
if bee_found:
|
|
print("🎉 SUCCESS: Bee classification found in document content!")
|
|
print(" The enhanced document processor is working correctly.")
|
|
else:
|
|
print("❌ ISSUE: Bee classification not found in document content")
|
|
print(" The enhanced processor may not be active or bee not detected")
|
|
|
|
print("\n💡 Next steps:")
|
|
print(" 1. Check server logs for processing details")
|
|
print(" 2. Verify the enhanced document processor is being used")
|
|
print(" 3. Check if OpenCLIP classifier is available")
|
|
|
|
if bee_found:
|
|
print("\n✅ TEST PASSED: Bee classification is present in document")
|
|
return True
|
|
else:
|
|
print("\n❌ TEST FAILED: Bee classification not found")
|
|
return False
|
|
|
|
if __name__ == "__main__":
|
|
success = main()
|
|
if success:
|
|
print("\n🎉 The bee classification system is working!")
|
|
else:
|
|
print("\n⚠️ Further investigation needed for bee classification.") |