Files
railseek6/fix_webui_search.py

131 lines
5.7 KiB
Python

import requests
import time
import os
def fix_webui_search():
base_url = 'http://localhost:3015'
headers = {'X-API-Key': 'jleu1212', 'Content-Type': 'application/json'}
print('🔧 Fixing Web UI search for bee classification...')
# Step 1: Delete all existing documents to start fresh
print('🗑️ Step 1: Deleting all existing documents...')
try:
# First get all documents
response = requests.get(f'{base_url}/documents', headers=headers, timeout=10)
if response.status_code == 200:
data = response.json()
processed_docs = data.get('statuses', {}).get('processed', [])
print(f'Found {len(processed_docs)} processed documents')
# Try to delete each document
for doc in processed_docs:
doc_id = doc.get('id')
if doc_id:
print(f'Deleting document: {doc_id}')
delete_response = requests.delete(f'{base_url}/documents/{doc_id}', headers=headers, timeout=10)
print(f'Delete status: {delete_response.status_code}')
# Wait for deletion to complete
time.sleep(3)
except Exception as e:
print(f'Delete operation failed: {e}')
# Step 2: Process the document with the updated processor to include classification
print('\n📄 Step 2: Processing document with updated processor...')
try:
from optimized_document_processor import OptimizedDocumentProcessor
import asyncio
processor = OptimizedDocumentProcessor()
test_file = "test.docx"
if not os.path.exists(test_file):
print(f'❌ Test file not found: {test_file}')
return False
print(f'Processing {test_file} with classification metadata...')
result = asyncio.run(processor.process_document(test_file))
if result["success"]:
print('✅ Document processed successfully with classification metadata')
print(f'Text content length: {len(result["text_content"])} chars')
# Check if classification is included
if 'Image Classifications:' in result["text_content"]:
print('✅ Classification metadata IS included in text content')
# Show the classification section
lines = result["text_content"].split('\n')
for line in lines:
if 'Image Classifications:' in line or 'bee' in line.lower():
print(f' {line}')
else:
print('❌ Classification metadata NOT included in text content')
return False
else:
print(f'❌ Processing failed: {result["metadata"].get("error", "Unknown error")}')
return False
except Exception as e:
print(f'❌ Document processing failed: {e}')
return False
# Step 3: Upload the processed document to LightRAG
print('\n📤 Step 3: Uploading document to LightRAG...')
try:
files = {'file': ('test.docx', open('test.docx', 'rb'), 'application/vnd.openxmlformats-officedocument.wordprocessingml.document')}
upload_response = requests.post(f'{base_url}/documents/upload', files=files, headers={'X-API-Key': 'jleu1212'}, timeout=30)
if upload_response.status_code == 200:
print('✅ Document uploaded successfully')
upload_data = upload_response.json()
print(f'Upload response: {upload_data}')
else:
print(f'❌ Upload failed: {upload_response.status_code} - {upload_response.text}')
return False
except Exception as e:
print(f'❌ Upload failed: {e}')
return False
# Step 4: Wait for indexing and verify search works
print('\n⏳ Step 4: Waiting for indexing (20 seconds)...')
time.sleep(20)
# Step 5: Verify search for bee classification
print('\n🔍 Step 5: Verifying bee classification search...')
try:
response = requests.post(f'{base_url}/api/search', headers=headers, json={'query': 'bee', 'top_k': 10}, timeout=15)
if response.status_code == 200:
results = response.json()
chunks = results.get('chunks', [])
print(f'Found {len(chunks)} chunks for "bee" search')
for i, chunk in enumerate(chunks):
content = chunk.get('content', '')
file_path = chunk.get('file_path', '')
print(f'\n📄 Chunk {i+1} from {file_path}:')
if 'Image Classifications:' in content:
print('✅ SUCCESS: Classification metadata IS present and searchable!')
print('The bee classification is now available through Web UI search.')
lines = content.split('\n')
for line in lines:
if 'Image Classifications:' in line or 'bee' in line.lower():
print(f' 🐝 {line}')
return True
else:
print('❌ Still no classification metadata in indexed content')
return False
else:
print(f'❌ Search failed: {response.status_code}')
return False
except Exception as e:
print(f'❌ Search verification failed: {e}')
return False
if __name__ == '__main__':
success = fix_webui_search()
if success:
print('\n🎉 SUCCESS: Bee classification is now searchable in Web UI!')
else:
print('\n⚠️ Some issues remain. The classification metadata may not be properly indexed.')