82 lines
2.8 KiB
Python
82 lines
2.8 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
Check document chunks to see what's actually stored in the vector database
|
|
"""
|
|
|
|
import requests
|
|
import json
|
|
|
|
LIGHTRAG_URL = 'http://localhost:3015'
|
|
API_KEY = 'jleu1212'
|
|
|
|
def check_document_chunks():
|
|
"""Check the chunks of the processed document"""
|
|
print("📄 Checking Document Chunks")
|
|
print("=" * 40)
|
|
|
|
headers = {'X-API-Key': API_KEY}
|
|
|
|
# First, get the document ID
|
|
try:
|
|
response = requests.get(f"{LIGHTRAG_URL}/documents", headers=headers)
|
|
if response.status_code == 200:
|
|
doc_data = response.json()
|
|
statuses = doc_data.get('statuses', {})
|
|
|
|
processed_docs = statuses.get('processed', [])
|
|
if processed_docs:
|
|
doc_id = processed_docs[0].get('id')
|
|
print(f"📋 Document ID: {doc_id}")
|
|
|
|
# Try to get chunks for this document
|
|
print(f"\n🔍 Getting chunks for document {doc_id}...")
|
|
chunks_response = requests.get(f"{LIGHTRAG_URL}/documents/{doc_id}/chunks", headers=headers)
|
|
if chunks_response.status_code == 200:
|
|
chunks_data = chunks_response.json()
|
|
print(f"Chunks response: {chunks_data}")
|
|
else:
|
|
print(f"❌ Failed to get chunks: {chunks_response.status_code}")
|
|
print(f"Response: {chunks_response.text}")
|
|
else:
|
|
print("❌ No processed documents found")
|
|
|
|
except Exception as e:
|
|
print(f"❌ Error: {e}")
|
|
|
|
def test_direct_chunk_search():
|
|
"""Test if we can search for chunks directly"""
|
|
print("\n🔍 Testing Direct Chunk Search")
|
|
print("=" * 40)
|
|
|
|
headers = {'Content-Type': 'application/json', 'X-API-Key': API_KEY}
|
|
|
|
# Try a more specific search that might trigger chunk retrieval
|
|
payload = {
|
|
'query': 'bee classification image photo',
|
|
'top_k': 10,
|
|
'mode': 'local',
|
|
'include_metadata': True
|
|
}
|
|
|
|
try:
|
|
response = requests.post(f'{LIGHTRAG_URL}/search', json=payload, headers=headers, timeout=10)
|
|
if response.status_code == 200:
|
|
search_data = response.json()
|
|
results = search_data.get('results', [])
|
|
print(f"Found {len(results)} results")
|
|
|
|
for i, result in enumerate(results):
|
|
content = result.get('content', '')
|
|
metadata = result.get('metadata', {})
|
|
print(f"\n--- Result {i+1} ---")
|
|
print(f"Content: {content}")
|
|
print(f"Metadata: {metadata}")
|
|
else:
|
|
print(f"❌ Search failed: {response.status_code}")
|
|
|
|
except Exception as e:
|
|
print(f"❌ Error: {e}")
|
|
|
|
if __name__ == "__main__":
|
|
check_document_chunks()
|
|
test_direct_chunk_search() |