Files
railseek6/test_document_processor.py

51 lines
1.7 KiB
Python

#!/usr/bin/env python3
"""
Test script to verify document processor functionality
"""
import sys
import os
sys.path.append('LightRAG-main')
from lightrag.document_processor import get_document_processor
import asyncio
async def test_processor():
"""Test the document processor with test.docx"""
print("🧪 Testing Document Processor")
print("=" * 40)
try:
processor = get_document_processor()
result = await processor.process_document('test.docx')
print(f"Success: {result.success}")
print(f"Content length: {len(result.content)}")
print(f"Images count: {len(result.images) if result.images else 0}")
if result.images:
for img in result.images:
classification = img.get("primary_classification", "No classification")
print(f"Image {img.get('index')}: {classification}")
# Print content snippets that contain 'classification' or 'bee'
print("\n🔍 Searching for classification content...")
content_lines = result.content.split('\n')
found_classification = False
for line in content_lines:
if 'classification' in line.lower() or 'bee' in line.lower():
print(f"Found: {line}")
found_classification = True
if not found_classification:
print("❌ No classification content found in document")
# Check metadata
print(f"\n📊 Metadata: {result.metadata}")
except Exception as e:
print(f"❌ Error testing processor: {e}")
import traceback
traceback.print_exc()
if __name__ == "__main__":
asyncio.run(test_processor())