83 lines
3.2 KiB
Python
83 lines
3.2 KiB
Python
|
|
import asyncio
|
|
import sys
|
|
import os
|
|
|
|
# Add paths
|
|
sys.path.insert(0, "LightRAG-main")
|
|
|
|
async def verify_fix():
|
|
"""Verify that OCR and OpenCLIP are working independently"""
|
|
print("🔍 VERIFYING COMPLETE FIX")
|
|
print("=" * 50)
|
|
|
|
try:
|
|
from lightrag.document_processor import get_document_processor
|
|
|
|
processor = get_document_processor()
|
|
|
|
print("🎯 COMPONENT STATUS:")
|
|
print(f" OCR: {'✅ Available' if processor.ocr_processor.ocr_available else '❌ Not Available'}")
|
|
print(f" Image Classifier: {'✅ Available' if processor.image_classifier and processor.image_classifier.available else '❌ Not Available'}")
|
|
|
|
# Process test document
|
|
test_file = "test.docx"
|
|
if not os.path.exists(test_file):
|
|
print(f"❌ Test file not found: {test_file}")
|
|
return
|
|
|
|
print(f"\n📄 PROCESSING: {test_file}")
|
|
result = await processor.process_document(test_file)
|
|
|
|
if not result.success:
|
|
print(f"❌ Processing failed: {result.error}")
|
|
return
|
|
|
|
print(f"✅ Processing successful")
|
|
print(f"📊 Metadata: {result.metadata}")
|
|
|
|
# Check OCR results
|
|
print(f"\n🔤 OCR PERFORMANCE:")
|
|
ocr_success = False
|
|
for i, img in enumerate(result.images):
|
|
if 'ocr_text' in img and img['ocr_text'].strip():
|
|
ocr_success = True
|
|
text_len = len(img['ocr_text'])
|
|
confidence = img.get('ocr_confidence', 0)
|
|
print(f" ✅ Image {i+1}: {text_len} chars, confidence: {confidence:.3f}")
|
|
if img['ocr_text'].strip():
|
|
print(f" Text: {img['ocr_text'][:50]}...")
|
|
elif 'ocr_error' in img:
|
|
print(f" ❌ Image {i+1}: {img['ocr_error']}")
|
|
else:
|
|
print(f" ⚠️ Image {i+1}: No OCR text")
|
|
|
|
# Check classification
|
|
print(f"\n🖼️ CLASSIFICATION PERFORMANCE:")
|
|
classification_success = False
|
|
bee_found = False
|
|
for i, img in enumerate(result.images):
|
|
if 'classification' in img and img['classification']:
|
|
classification_success = True
|
|
top_result = img['classification'][0]
|
|
label = top_result.get('label', 'unknown')
|
|
score = top_result.get('confidence', 0)
|
|
print(f" ✅ Image {i+1}: {label} (score: {score:.3f})")
|
|
if 'bee' in label.lower():
|
|
bee_found = True
|
|
print(f" 🎯 BEE DETECTED!")
|
|
|
|
print(f"\n🎯 FINAL RESULTS:")
|
|
print(f" OCR: {'✅ WORKING' if ocr_success else '❌ FAILED'}")
|
|
print(f" Classification: {'✅ WORKING' if classification_success else '❌ FAILED'}")
|
|
print(f" Bee Detection: {'✅ SUCCESS' if bee_found else '❌ NOT FOUND'}")
|
|
print(f" Dependency Isolation: {'✅ ACHIEVED' if ocr_success and classification_success else '❌ FAILED'}")
|
|
|
|
except Exception as e:
|
|
print(f"❌ Verification failed: {e}")
|
|
import traceback
|
|
traceback.print_exc()
|
|
|
|
if __name__ == "__main__":
|
|
asyncio.run(verify_fix())
|