""" Test to analyze the content being processed by the document processor """ import sys import os sys.path.append('LightRAG-main') from lightrag.document_processor import get_document_processor import asyncio async def test(): processor = get_document_processor() result = await processor.process_document('ocr.pdf') print('Content length:', len(result.content)) print('Content starts with:', repr(result.content[:100])) print('Content ends with:', repr(result.content[-100:])) print('Is whitespace only:', not result.content.strip()) print('Stripped length:', len(result.content.strip())) print('Success:', result.success) print('Error:', result.error) # Check if there's a whitespace issue if not result.content.strip(): print("⚠️ CONTENT IS EMPTY AFTER STRIPPING!") print("Raw content:", repr(result.content)) else: print("✅ Content has non-whitespace characters") if __name__ == "__main__": asyncio.run(test())