railseek6/final_webui_bee_test.py

import requests
import json

def final_webui_bee_test():
    base_url = 'http://localhost:3015'
    headers = {'X-API-Key': 'jleu1212', 'Content-Type': 'application/json'}

    # Check if the bee classification is actually searchable in the Web UI
    print('🔍 Final verification of Web UI bee search...')

    # First, get current document status
    response = requests.get(f'{base_url}/documents', headers=headers, timeout=10)
    if response.status_code == 200:
        data = response.json()
        print(f'Current documents: {data}')
    else:
        print(f'Documents endpoint error: {response.status_code}')

    # Search for bee and check if classification metadata is present
    print('\n🔎 Searching for "bee" in Web UI...')
    response = requests.post(f'{base_url}/api/search', headers=headers, json={'query': 'bee', 'top_k': 10}, timeout=15)

    if response.status_code == 200:
        results = response.json()
        chunks = results.get('chunks', [])
        print(f'Found {len(chunks)} chunks for "bee"')

        for i, chunk in enumerate(chunks):
            content = chunk.get('content', '')
            file_path = chunk.get('file_path', '')
            print(f'\n📄 Chunk {i+1} from {file_path}:')

            # Check the FULL content for classification metadata
            if 'Image Classifications:' in content:
                print('✅ SUCCESS: Classification metadata IS present and searchable!')
                print('The bee classification is available through Web UI search.')
                # Extract and show the classification section
                lines = content.split('\n')
                for line in lines:
                    if 'Image Classifications:' in line or 'bee' in line.lower():
                        print(f'   🐝 {line}')
            else:
                print('❌ ISSUE: Classification metadata is NOT present in indexed content')
                print('This means the document was processed WITHOUT the updated processor.')
                print('The root cause is that LightRAG is using its own document processor, not our updated one.')
                print('\nPossible solutions:')
                print('1. Modify LightRAG to use our custom processor')
                print('2. Pre-process documents externally and upload the processed text')
                print('3. Inject classification metadata into LightRAG\'s processing pipeline')

                # Show what IS in the content
                print(f'\n📝 Current content (first 500 chars):')
                print(content[:500] + '...' if len(content) > 500 else content)
    else:
        print(f'Search failed: {response.status_code} - {response.text}')

if __name__ == '__main__':
    final_webui_bee_test()