Files
railseek6/simple_bee_test.py

179 lines
6.3 KiB
Python

"""
Simple Test for Bee Classification in Document Processing
Tests if the enhanced document processor is working without API dependencies
"""
import requests
import time
import os
# Configuration
LIGHTRAG_URL = "http://localhost:3015"
API_KEY = "jleu1212"
HEADERS = {"X-API-Key": API_KEY}
def check_server_status():
"""Check if server is running"""
print("🔍 CHECKING SERVER STATUS...")
try:
response = requests.get(f"{LIGHTRAG_URL}/", headers=HEADERS, timeout=5)
if response.status_code == 200:
print("✅ Server is running")
return True
else:
print(f"❌ Server status: {response.status_code}")
return False
except Exception as e:
print(f"❌ Server not accessible: {e}")
return False
def check_documents():
"""Check current documents in system"""
print("📄 CHECKING DOCUMENTS...")
try:
response = requests.get(f"{LIGHTRAG_URL}/documents", headers=HEADERS, timeout=10)
if response.status_code == 200:
documents = response.json()
print(f"📊 Found {len(documents)} documents:")
for doc in documents:
print(f" - {doc.get('filename', 'Unknown')}: {doc.get('status', 'unknown')}")
return documents
else:
print(f"❌ Failed to get documents: {response.status_code}")
return []
except Exception as e:
print(f"❌ Error checking documents: {e}")
return []
def test_simple_search():
"""Test simple search without complex queries"""
print("🔍 TESTING SIMPLE SEARCH...")
# Test with simple terms that might be in the document
simple_queries = [
"test",
"document",
"text"
]
for query in simple_queries:
try:
search_payload = {
"query": query,
"top_k": 5,
"mode": "standard"
}
response = requests.post(
f"{LIGHTRAG_URL}/search",
json=search_payload,
headers=HEADERS,
timeout=10
)
if response.status_code == 200:
results = response.json()
if results.get('results'):
print(f"'{query}': Found {len(results['results'])} results")
for result in results['results']:
content = result.get('content', '')[:100]
score = result.get('score', 0)
print(f" Score {score:.4f}: {content}...")
else:
print(f"'{query}': No results")
else:
print(f"'{query}' search failed: {response.status_code}")
except Exception as e:
print(f"'{query}' search error: {e}")
def check_document_content():
"""Check if we can get document content directly"""
print("📝 CHECKING DOCUMENT CONTENT...")
try:
# Get documents first
response = requests.get(f"{LIGHTRAG_URL}/documents", headers=HEADERS, timeout=10)
if response.status_code == 200:
documents = response.json()
for doc in documents:
if 'test.docx' in doc.get('filename', '').lower():
doc_id = doc.get('id')
print(f"📄 Found test.docx with ID: {doc_id}")
# Try to get document content
try:
content_response = requests.get(
f"{LIGHTRAG_URL}/documents/{doc_id}/content",
headers=HEADERS,
timeout=10
)
if content_response.status_code == 200:
content = content_response.text
print(f"✅ Document content preview (first 500 chars):")
print(f" {content[:500]}...")
# Check for bee-related content
if 'bee' in content.lower():
print("🎯 BEE CLASSIFICATION FOUND IN CONTENT!")
return True
else:
print("❌ No bee classification found in content")
return False
else:
print(f"❌ Could not get content: {content_response.status_code}")
except Exception as e:
print(f"❌ Error getting content: {e}")
return False
except Exception as e:
print(f"❌ Error checking document content: {e}")
return False
def main():
"""Main test function"""
print("🧪 SIMPLE BEE CLASSIFICATION TEST")
print("=" * 60)
# Step 1: Check server status
if not check_server_status():
print("❌ Cannot proceed - server not running")
return False
# Step 2: Check current documents
documents = check_documents()
# Step 3: Check if test.docx exists and get its content
bee_found = check_document_content()
# Step 4: Test simple search
test_simple_search()
print("\n" + "=" * 60)
print("📊 TEST RESULTS")
print("=" * 60)
if bee_found:
print("🎉 SUCCESS: Bee classification found in document content!")
print(" The enhanced document processor is working correctly.")
else:
print("❌ ISSUE: Bee classification not found in document content")
print(" The enhanced processor may not be active or bee not detected")
print("\n💡 Next steps:")
print(" 1. Check server logs for processing details")
print(" 2. Verify the enhanced document processor is being used")
print(" 3. Check if OpenCLIP classifier is available")
if bee_found:
print("\n✅ TEST PASSED: Bee classification is present in document")
return True
else:
print("\n❌ TEST FAILED: Bee classification not found")
return False
if __name__ == "__main__":
success = main()
if success:
print("\n🎉 The bee classification system is working!")
else:
print("\n⚠️ Further investigation needed for bee classification.")