#!/usr/bin/env python3 """ Test script to verify document processor functionality """ import sys import os sys.path.append('LightRAG-main') from lightrag.document_processor import get_document_processor import asyncio async def test_processor(): """Test the document processor with test.docx""" print("๐Ÿงช Testing Document Processor") print("=" * 40) try: processor = get_document_processor() result = await processor.process_document('test.docx') print(f"Success: {result.success}") print(f"Content length: {len(result.content)}") print(f"Images count: {len(result.images) if result.images else 0}") if result.images: for img in result.images: classification = img.get("primary_classification", "No classification") print(f"Image {img.get('index')}: {classification}") # Print content snippets that contain 'classification' or 'bee' print("\n๐Ÿ” Searching for classification content...") content_lines = result.content.split('\n') found_classification = False for line in content_lines: if 'classification' in line.lower() or 'bee' in line.lower(): print(f"Found: {line}") found_classification = True if not found_classification: print("โŒ No classification content found in document") # Check metadata print(f"\n๐Ÿ“Š Metadata: {result.metadata}") except Exception as e: print(f"โŒ Error testing processor: {e}") import traceback traceback.print_exc() if __name__ == "__main__": asyncio.run(test_processor())