import requests import time import os def fix_webui_search(): base_url = 'http://localhost:3015' headers = {'X-API-Key': 'jleu1212', 'Content-Type': 'application/json'} print('šŸ”§ Fixing Web UI search for bee classification...') # Step 1: Delete all existing documents to start fresh print('šŸ—‘ļø Step 1: Deleting all existing documents...') try: # First get all documents response = requests.get(f'{base_url}/documents', headers=headers, timeout=10) if response.status_code == 200: data = response.json() processed_docs = data.get('statuses', {}).get('processed', []) print(f'Found {len(processed_docs)} processed documents') # Try to delete each document for doc in processed_docs: doc_id = doc.get('id') if doc_id: print(f'Deleting document: {doc_id}') delete_response = requests.delete(f'{base_url}/documents/{doc_id}', headers=headers, timeout=10) print(f'Delete status: {delete_response.status_code}') # Wait for deletion to complete time.sleep(3) except Exception as e: print(f'Delete operation failed: {e}') # Step 2: Process the document with the updated processor to include classification print('\nšŸ“„ Step 2: Processing document with updated processor...') try: from optimized_document_processor import OptimizedDocumentProcessor import asyncio processor = OptimizedDocumentProcessor() test_file = "test.docx" if not os.path.exists(test_file): print(f'āŒ Test file not found: {test_file}') return False print(f'Processing {test_file} with classification metadata...') result = asyncio.run(processor.process_document(test_file)) if result["success"]: print('āœ… Document processed successfully with classification metadata') print(f'Text content length: {len(result["text_content"])} chars') # Check if classification is included if 'Image Classifications:' in result["text_content"]: print('āœ… Classification metadata IS included in text content') # Show the classification section lines = result["text_content"].split('\n') for line in lines: if 'Image Classifications:' in line or 'bee' in line.lower(): print(f' {line}') else: print('āŒ Classification metadata NOT included in text content') return False else: print(f'āŒ Processing failed: {result["metadata"].get("error", "Unknown error")}') return False except Exception as e: print(f'āŒ Document processing failed: {e}') return False # Step 3: Upload the processed document to LightRAG print('\nšŸ“¤ Step 3: Uploading document to LightRAG...') try: files = {'file': ('test.docx', open('test.docx', 'rb'), 'application/vnd.openxmlformats-officedocument.wordprocessingml.document')} upload_response = requests.post(f'{base_url}/documents/upload', files=files, headers={'X-API-Key': 'jleu1212'}, timeout=30) if upload_response.status_code == 200: print('āœ… Document uploaded successfully') upload_data = upload_response.json() print(f'Upload response: {upload_data}') else: print(f'āŒ Upload failed: {upload_response.status_code} - {upload_response.text}') return False except Exception as e: print(f'āŒ Upload failed: {e}') return False # Step 4: Wait for indexing and verify search works print('\nā³ Step 4: Waiting for indexing (20 seconds)...') time.sleep(20) # Step 5: Verify search for bee classification print('\nšŸ” Step 5: Verifying bee classification search...') try: response = requests.post(f'{base_url}/api/search', headers=headers, json={'query': 'bee', 'top_k': 10}, timeout=15) if response.status_code == 200: results = response.json() chunks = results.get('chunks', []) print(f'Found {len(chunks)} chunks for "bee" search') for i, chunk in enumerate(chunks): content = chunk.get('content', '') file_path = chunk.get('file_path', '') print(f'\nšŸ“„ Chunk {i+1} from {file_path}:') if 'Image Classifications:' in content: print('āœ… SUCCESS: Classification metadata IS present and searchable!') print('The bee classification is now available through Web UI search.') lines = content.split('\n') for line in lines: if 'Image Classifications:' in line or 'bee' in line.lower(): print(f' šŸ {line}') return True else: print('āŒ Still no classification metadata in indexed content') return False else: print(f'āŒ Search failed: {response.status_code}') return False except Exception as e: print(f'āŒ Search verification failed: {e}') return False if __name__ == '__main__': success = fix_webui_search() if success: print('\nšŸŽ‰ SUCCESS: Bee classification is now searchable in Web UI!') else: print('\nāš ļø Some issues remain. The classification metadata may not be properly indexed.')