58 lines
2.7 KiB
Python
58 lines
2.7 KiB
Python
import requests
|
|
import json
|
|
|
|
def final_webui_bee_test():
|
|
base_url = 'http://localhost:3015'
|
|
headers = {'X-API-Key': 'jleu1212', 'Content-Type': 'application/json'}
|
|
|
|
# Check if the bee classification is actually searchable in the Web UI
|
|
print('🔍 Final verification of Web UI bee search...')
|
|
|
|
# First, get current document status
|
|
response = requests.get(f'{base_url}/documents', headers=headers, timeout=10)
|
|
if response.status_code == 200:
|
|
data = response.json()
|
|
print(f'Current documents: {data}')
|
|
else:
|
|
print(f'Documents endpoint error: {response.status_code}')
|
|
|
|
# Search for bee and check if classification metadata is present
|
|
print('\n🔎 Searching for "bee" in Web UI...')
|
|
response = requests.post(f'{base_url}/api/search', headers=headers, json={'query': 'bee', 'top_k': 10}, timeout=15)
|
|
|
|
if response.status_code == 200:
|
|
results = response.json()
|
|
chunks = results.get('chunks', [])
|
|
print(f'Found {len(chunks)} chunks for "bee"')
|
|
|
|
for i, chunk in enumerate(chunks):
|
|
content = chunk.get('content', '')
|
|
file_path = chunk.get('file_path', '')
|
|
print(f'\n📄 Chunk {i+1} from {file_path}:')
|
|
|
|
# Check the FULL content for classification metadata
|
|
if 'Image Classifications:' in content:
|
|
print('✅ SUCCESS: Classification metadata IS present and searchable!')
|
|
print('The bee classification is available through Web UI search.')
|
|
# Extract and show the classification section
|
|
lines = content.split('\n')
|
|
for line in lines:
|
|
if 'Image Classifications:' in line or 'bee' in line.lower():
|
|
print(f' 🐝 {line}')
|
|
else:
|
|
print('❌ ISSUE: Classification metadata is NOT present in indexed content')
|
|
print('This means the document was processed WITHOUT the updated processor.')
|
|
print('The root cause is that LightRAG is using its own document processor, not our updated one.')
|
|
print('\nPossible solutions:')
|
|
print('1. Modify LightRAG to use our custom processor')
|
|
print('2. Pre-process documents externally and upload the processed text')
|
|
print('3. Inject classification metadata into LightRAG\'s processing pipeline')
|
|
|
|
# Show what IS in the content
|
|
print(f'\n📝 Current content (first 500 chars):')
|
|
print(content[:500] + '...' if len(content) > 500 else content)
|
|
else:
|
|
print(f'Search failed: {response.status_code} - {response.text}')
|
|
|
|
if __name__ == '__main__':
|
|
final_webui_bee_test() |