import asyncio import sys import os sys.path.insert(0, 'LightRAG-main') from lightrag.document_processor import get_document_processor async def test_pdf(): processor = get_document_processor() result = await processor.process_document('test/safedistance.pdf') print(f"Success: {result.success}") print(f"Content length: {len(result.content)}") print(f"Tables found: {len(result.tables) if result.tables else 0}") if result.tables: for i, table in enumerate(result.tables): print(f"Table {i}: rows={table.get('rows')}, cols={table.get('columns')}") # Print first few rows data = table.get('data', []) for r in range(min(3, len(data))): print(f" Row {r}: {data[r]}") # Check if OCR was used if result.metadata.get('processed_with_ocr'): print("OCR was used (good)") else: print("OCR was NOT used (maybe text extraction succeeded?)") # Print first 500 chars of content print("\n--- First 500 characters of extracted content ---") print(result.content[:500]) if __name__ == "__main__": asyncio.run(test_pdf())