Files
railseek6/check_document_content.py

16 lines
655 B
Python

from optimized_document_processor import OptimizedDocumentProcessor
import asyncio
async def check_document_content():
processor = OptimizedDocumentProcessor()
result = await processor.process_document('test.docx')
print('=== FULL TEXT CONTENT ===')
print(result['text_content'])
print('\n=== IMAGE CLASSIFICATIONS ===')
for i, img in enumerate(result['images']):
if 'classification' in img and img['classification']:
print(f'Image {i+1}: {img["classification"][0]["label"]} (confidence: {img["classification"][0]["confidence"]:.3f})')
if __name__ == '__main__':
asyncio.run(check_document_content())