Files
railseek6/check_full_content.py

51 lines
2.4 KiB
Python

import requests
def check_full_document_content():
base_url = 'http://localhost:3015'
headers = {'X-API-Key': 'jleu1212', 'Content-Type': 'application/json'}
# Get all documents to see full content
try:
response = requests.get(f"{base_url}/api/documents", headers=headers, timeout=20)
print(f'Documents API Status: {response.status_code}')
if response.status_code == 200:
documents = response.json()
print(f'Total documents: {len(documents)}')
for doc in documents:
print(f"\n📄 Document: {doc.get('file_path')}")
print(f" ID: {doc.get('id')}")
print(f" Status: {doc.get('status')}")
# Try to get document chunks
doc_id = doc.get('id')
if doc_id:
chunk_response = requests.get(f"{base_url}/api/documents/{doc_id}/chunks", headers=headers, timeout=20)
if chunk_response.status_code == 200:
chunks = chunk_response.json()
print(f" Chunks: {len(chunks)}")
for i, chunk in enumerate(chunks):
content = chunk.get('content', '')
print(f" Chunk {i+1}: {len(content)} chars")
# Check if this chunk contains classification
if 'Image Classifications:' in content:
print(f" ✅ CONTAINS CLASSIFICATION DATA:")
lines = content.split('\n')
for line in lines:
if 'Image Classifications:' in line or 'bee' in line.lower():
print(f" {line}")
else:
# Show preview of chunk
preview = content[:200] + "..." if len(content) > 200 else content
print(f" Preview: {preview}")
else:
print(f" Error getting chunks: {chunk_response.status_code}")
else:
print(f'Error: {response.text}')
except Exception as e:
print(f'Exception: {e}')
if __name__ == '__main__':
check_full_document_content()