railseek6/simple_bee_test.py

"""
Simple Test for Bee Classification in Document Processing
Tests if the enhanced document processor is working without API dependencies
"""

import requests
import time
import os

# Configuration
LIGHTRAG_URL = "http://localhost:3015"
API_KEY = "jleu1212"
HEADERS = {"X-API-Key": API_KEY}

def check_server_status():
    """Check if server is running"""
    print("🔍 CHECKING SERVER STATUS...")
    try:
        response = requests.get(f"{LIGHTRAG_URL}/", headers=HEADERS, timeout=5)
        if response.status_code == 200:
            print("✅ Server is running")
            return True
        else:
            print(f"❌ Server status: {response.status_code}")
            return False
    except Exception as e:
        print(f"❌ Server not accessible: {e}")
        return False

def check_documents():
    """Check current documents in system"""
    print("📄 CHECKING DOCUMENTS...")
    try:
        response = requests.get(f"{LIGHTRAG_URL}/documents", headers=HEADERS, timeout=10)
        if response.status_code == 200:
            documents = response.json()
            print(f"📊 Found {len(documents)} documents:")
            for doc in documents:
                print(f"   - {doc.get('filename', 'Unknown')}: {doc.get('status', 'unknown')}")
            return documents
        else:
            print(f"❌ Failed to get documents: {response.status_code}")
            return []
    except Exception as e:
        print(f"❌ Error checking documents: {e}")
        return []

def test_simple_search():
    """Test simple search without complex queries"""
    print("🔍 TESTING SIMPLE SEARCH...")

    # Test with simple terms that might be in the document
    simple_queries = [
        "test",
        "document",
        "text"
    ]

    for query in simple_queries:
        try:
            search_payload = {
                "query": query,
                "top_k": 5,
                "mode": "standard"
            }

            response = requests.post(
                f"{LIGHTRAG_URL}/search",
                json=search_payload,
                headers=HEADERS,
                timeout=10
            )

            if response.status_code == 200:
                results = response.json()
                if results.get('results'):
                    print(f"✅ '{query}': Found {len(results['results'])} results")
                    for result in results['results']:
                        content = result.get('content', '')[:100]
                        score = result.get('score', 0)
                        print(f"   Score {score:.4f}: {content}...")
                else:
                    print(f"❌ '{query}': No results")
            else:
                print(f"❌ '{query}' search failed: {response.status_code}")

        except Exception as e:
            print(f"❌ '{query}' search error: {e}")

def check_document_content():
    """Check if we can get document content directly"""
    print("📝 CHECKING DOCUMENT CONTENT...")

    try:
        # Get documents first
        response = requests.get(f"{LIGHTRAG_URL}/documents", headers=HEADERS, timeout=10)
        if response.status_code == 200:
            documents = response.json()
            for doc in documents:
                if 'test.docx' in doc.get('filename', '').lower():
                    doc_id = doc.get('id')
                    print(f"📄 Found test.docx with ID: {doc_id}")

                    # Try to get document content
                    try:
                        content_response = requests.get(
                            f"{LIGHTRAG_URL}/documents/{doc_id}/content",
                            headers=HEADERS,
                            timeout=10
                        )
                        if content_response.status_code == 200:
                            content = content_response.text
                            print(f"✅ Document content preview (first 500 chars):")
                            print(f"   {content[:500]}...")

                            # Check for bee-related content
                            if 'bee' in content.lower():
                                print("🎯 BEE CLASSIFICATION FOUND IN CONTENT!")
                                return True
                            else:
                                print("❌ No bee classification found in content")
                                return False
                        else:
                            print(f"❌ Could not get content: {content_response.status_code}")
                    except Exception as e:
                        print(f"❌ Error getting content: {e}")
        return False
    except Exception as e:
        print(f"❌ Error checking document content: {e}")
        return False

def main():
    """Main test function"""
    print("🧪 SIMPLE BEE CLASSIFICATION TEST")
    print("=" * 60)

    # Step 1: Check server status
    if not check_server_status():
        print("❌ Cannot proceed - server not running")
        return False

    # Step 2: Check current documents
    documents = check_documents()

    # Step 3: Check if test.docx exists and get its content
    bee_found = check_document_content()

    # Step 4: Test simple search
    test_simple_search()

    print("\n" + "=" * 60)
    print("📊 TEST RESULTS")
    print("=" * 60)

    if bee_found:
        print("🎉 SUCCESS: Bee classification found in document content!")
        print("   The enhanced document processor is working correctly.")
    else:
        print("❌ ISSUE: Bee classification not found in document content")
        print("   The enhanced processor may not be active or bee not detected")

    print("\n💡 Next steps:")
    print("   1. Check server logs for processing details")
    print("   2. Verify the enhanced document processor is being used")
    print("   3. Check if OpenCLIP classifier is available")

    if bee_found:
        print("\n✅ TEST PASSED: Bee classification is present in document")
        return True
    else:
        print("\n❌ TEST FAILED: Bee classification not found")
        return False

if __name__ == "__main__":
    success = main()
    if success:
        print("\n🎉 The bee classification system is working!")
    else:
        print("\n⚠️  Further investigation needed for bee classification.")