#!/usr/bin/env python3 """ Check document chunks to see what's actually stored in the vector database """ import requests import json LIGHTRAG_URL = 'http://localhost:3015' API_KEY = 'jleu1212' def check_document_chunks(): """Check the chunks of the processed document""" print("šŸ“„ Checking Document Chunks") print("=" * 40) headers = {'X-API-Key': API_KEY} # First, get the document ID try: response = requests.get(f"{LIGHTRAG_URL}/documents", headers=headers) if response.status_code == 200: doc_data = response.json() statuses = doc_data.get('statuses', {}) processed_docs = statuses.get('processed', []) if processed_docs: doc_id = processed_docs[0].get('id') print(f"šŸ“‹ Document ID: {doc_id}") # Try to get chunks for this document print(f"\nšŸ” Getting chunks for document {doc_id}...") chunks_response = requests.get(f"{LIGHTRAG_URL}/documents/{doc_id}/chunks", headers=headers) if chunks_response.status_code == 200: chunks_data = chunks_response.json() print(f"Chunks response: {chunks_data}") else: print(f"āŒ Failed to get chunks: {chunks_response.status_code}") print(f"Response: {chunks_response.text}") else: print("āŒ No processed documents found") except Exception as e: print(f"āŒ Error: {e}") def test_direct_chunk_search(): """Test if we can search for chunks directly""" print("\nšŸ” Testing Direct Chunk Search") print("=" * 40) headers = {'Content-Type': 'application/json', 'X-API-Key': API_KEY} # Try a more specific search that might trigger chunk retrieval payload = { 'query': 'bee classification image photo', 'top_k': 10, 'mode': 'local', 'include_metadata': True } try: response = requests.post(f'{LIGHTRAG_URL}/search', json=payload, headers=headers, timeout=10) if response.status_code == 200: search_data = response.json() results = search_data.get('results', []) print(f"Found {len(results)} results") for i, result in enumerate(results): content = result.get('content', '') metadata = result.get('metadata', {}) print(f"\n--- Result {i+1} ---") print(f"Content: {content}") print(f"Metadata: {metadata}") else: print(f"āŒ Search failed: {response.status_code}") except Exception as e: print(f"āŒ Error: {e}") if __name__ == "__main__": check_document_chunks() test_direct_chunk_search()