Files
railseek6/verify_fix.py

83 lines
3.2 KiB
Python

import asyncio
import sys
import os
# Add paths
sys.path.insert(0, "LightRAG-main")
async def verify_fix():
"""Verify that OCR and OpenCLIP are working independently"""
print("🔍 VERIFYING COMPLETE FIX")
print("=" * 50)
try:
from lightrag.document_processor import get_document_processor
processor = get_document_processor()
print("🎯 COMPONENT STATUS:")
print(f" OCR: {'✅ Available' if processor.ocr_processor.ocr_available else '❌ Not Available'}")
print(f" Image Classifier: {'✅ Available' if processor.image_classifier and processor.image_classifier.available else '❌ Not Available'}")
# Process test document
test_file = "test.docx"
if not os.path.exists(test_file):
print(f"❌ Test file not found: {test_file}")
return
print(f"\n📄 PROCESSING: {test_file}")
result = await processor.process_document(test_file)
if not result.success:
print(f"❌ Processing failed: {result.error}")
return
print(f"✅ Processing successful")
print(f"📊 Metadata: {result.metadata}")
# Check OCR results
print(f"\n🔤 OCR PERFORMANCE:")
ocr_success = False
for i, img in enumerate(result.images):
if 'ocr_text' in img and img['ocr_text'].strip():
ocr_success = True
text_len = len(img['ocr_text'])
confidence = img.get('ocr_confidence', 0)
print(f" ✅ Image {i+1}: {text_len} chars, confidence: {confidence:.3f}")
if img['ocr_text'].strip():
print(f" Text: {img['ocr_text'][:50]}...")
elif 'ocr_error' in img:
print(f" ❌ Image {i+1}: {img['ocr_error']}")
else:
print(f" ⚠️ Image {i+1}: No OCR text")
# Check classification
print(f"\n🖼️ CLASSIFICATION PERFORMANCE:")
classification_success = False
bee_found = False
for i, img in enumerate(result.images):
if 'classification' in img and img['classification']:
classification_success = True
top_result = img['classification'][0]
label = top_result.get('label', 'unknown')
score = top_result.get('confidence', 0)
print(f" ✅ Image {i+1}: {label} (score: {score:.3f})")
if 'bee' in label.lower():
bee_found = True
print(f" 🎯 BEE DETECTED!")
print(f"\n🎯 FINAL RESULTS:")
print(f" OCR: {'✅ WORKING' if ocr_success else '❌ FAILED'}")
print(f" Classification: {'✅ WORKING' if classification_success else '❌ FAILED'}")
print(f" Bee Detection: {'✅ SUCCESS' if bee_found else '❌ NOT FOUND'}")
print(f" Dependency Isolation: {'✅ ACHIEVED' if ocr_success and classification_success else '❌ FAILED'}")
except Exception as e:
print(f"❌ Verification failed: {e}")
import traceback
traceback.print_exc()
if __name__ == "__main__":
asyncio.run(verify_fix())