Files
railseek6/final_webui_bee_test.py

58 lines
2.7 KiB
Python

import requests
import json
def final_webui_bee_test():
base_url = 'http://localhost:3015'
headers = {'X-API-Key': 'jleu1212', 'Content-Type': 'application/json'}
# Check if the bee classification is actually searchable in the Web UI
print('🔍 Final verification of Web UI bee search...')
# First, get current document status
response = requests.get(f'{base_url}/documents', headers=headers, timeout=10)
if response.status_code == 200:
data = response.json()
print(f'Current documents: {data}')
else:
print(f'Documents endpoint error: {response.status_code}')
# Search for bee and check if classification metadata is present
print('\n🔎 Searching for "bee" in Web UI...')
response = requests.post(f'{base_url}/api/search', headers=headers, json={'query': 'bee', 'top_k': 10}, timeout=15)
if response.status_code == 200:
results = response.json()
chunks = results.get('chunks', [])
print(f'Found {len(chunks)} chunks for "bee"')
for i, chunk in enumerate(chunks):
content = chunk.get('content', '')
file_path = chunk.get('file_path', '')
print(f'\n📄 Chunk {i+1} from {file_path}:')
# Check the FULL content for classification metadata
if 'Image Classifications:' in content:
print('✅ SUCCESS: Classification metadata IS present and searchable!')
print('The bee classification is available through Web UI search.')
# Extract and show the classification section
lines = content.split('\n')
for line in lines:
if 'Image Classifications:' in line or 'bee' in line.lower():
print(f' 🐝 {line}')
else:
print('❌ ISSUE: Classification metadata is NOT present in indexed content')
print('This means the document was processed WITHOUT the updated processor.')
print('The root cause is that LightRAG is using its own document processor, not our updated one.')
print('\nPossible solutions:')
print('1. Modify LightRAG to use our custom processor')
print('2. Pre-process documents externally and upload the processed text')
print('3. Inject classification metadata into LightRAG\'s processing pipeline')
# Show what IS in the content
print(f'\n📝 Current content (first 500 chars):')
print(content[:500] + '...' if len(content) > 500 else content)
else:
print(f'Search failed: {response.status_code} - {response.text}')
if __name__ == '__main__':
final_webui_bee_test()