Files
railseek6/check_chunks.py

82 lines
2.8 KiB
Python

#!/usr/bin/env python3
"""
Check document chunks to see what's actually stored in the vector database
"""
import requests
import json
LIGHTRAG_URL = 'http://localhost:3015'
API_KEY = 'jleu1212'
def check_document_chunks():
"""Check the chunks of the processed document"""
print("📄 Checking Document Chunks")
print("=" * 40)
headers = {'X-API-Key': API_KEY}
# First, get the document ID
try:
response = requests.get(f"{LIGHTRAG_URL}/documents", headers=headers)
if response.status_code == 200:
doc_data = response.json()
statuses = doc_data.get('statuses', {})
processed_docs = statuses.get('processed', [])
if processed_docs:
doc_id = processed_docs[0].get('id')
print(f"📋 Document ID: {doc_id}")
# Try to get chunks for this document
print(f"\n🔍 Getting chunks for document {doc_id}...")
chunks_response = requests.get(f"{LIGHTRAG_URL}/documents/{doc_id}/chunks", headers=headers)
if chunks_response.status_code == 200:
chunks_data = chunks_response.json()
print(f"Chunks response: {chunks_data}")
else:
print(f"❌ Failed to get chunks: {chunks_response.status_code}")
print(f"Response: {chunks_response.text}")
else:
print("❌ No processed documents found")
except Exception as e:
print(f"❌ Error: {e}")
def test_direct_chunk_search():
"""Test if we can search for chunks directly"""
print("\n🔍 Testing Direct Chunk Search")
print("=" * 40)
headers = {'Content-Type': 'application/json', 'X-API-Key': API_KEY}
# Try a more specific search that might trigger chunk retrieval
payload = {
'query': 'bee classification image photo',
'top_k': 10,
'mode': 'local',
'include_metadata': True
}
try:
response = requests.post(f'{LIGHTRAG_URL}/search', json=payload, headers=headers, timeout=10)
if response.status_code == 200:
search_data = response.json()
results = search_data.get('results', [])
print(f"Found {len(results)} results")
for i, result in enumerate(results):
content = result.get('content', '')
metadata = result.get('metadata', {})
print(f"\n--- Result {i+1} ---")
print(f"Content: {content}")
print(f"Metadata: {metadata}")
else:
print(f"❌ Search failed: {response.status_code}")
except Exception as e:
print(f"❌ Error: {e}")
if __name__ == "__main__":
check_document_chunks()
test_direct_chunk_search()