import asyncio import sys import os # Add paths sys.path.insert(0, "LightRAG-main") async def verify_fix(): """Verify that OCR and OpenCLIP are working independently""" print("šŸ” VERIFYING COMPLETE FIX") print("=" * 50) try: from lightrag.document_processor import get_document_processor processor = get_document_processor() print("šŸŽÆ COMPONENT STATUS:") print(f" OCR: {'āœ… Available' if processor.ocr_processor.ocr_available else 'āŒ Not Available'}") print(f" Image Classifier: {'āœ… Available' if processor.image_classifier and processor.image_classifier.available else 'āŒ Not Available'}") # Process test document test_file = "test.docx" if not os.path.exists(test_file): print(f"āŒ Test file not found: {test_file}") return print(f"\nšŸ“„ PROCESSING: {test_file}") result = await processor.process_document(test_file) if not result.success: print(f"āŒ Processing failed: {result.error}") return print(f"āœ… Processing successful") print(f"šŸ“Š Metadata: {result.metadata}") # Check OCR results print(f"\nšŸ”¤ OCR PERFORMANCE:") ocr_success = False for i, img in enumerate(result.images): if 'ocr_text' in img and img['ocr_text'].strip(): ocr_success = True text_len = len(img['ocr_text']) confidence = img.get('ocr_confidence', 0) print(f" āœ… Image {i+1}: {text_len} chars, confidence: {confidence:.3f}") if img['ocr_text'].strip(): print(f" Text: {img['ocr_text'][:50]}...") elif 'ocr_error' in img: print(f" āŒ Image {i+1}: {img['ocr_error']}") else: print(f" āš ļø Image {i+1}: No OCR text") # Check classification print(f"\nšŸ–¼ļø CLASSIFICATION PERFORMANCE:") classification_success = False bee_found = False for i, img in enumerate(result.images): if 'classification' in img and img['classification']: classification_success = True top_result = img['classification'][0] label = top_result.get('label', 'unknown') score = top_result.get('confidence', 0) print(f" āœ… Image {i+1}: {label} (score: {score:.3f})") if 'bee' in label.lower(): bee_found = True print(f" šŸŽÆ BEE DETECTED!") print(f"\nšŸŽÆ FINAL RESULTS:") print(f" OCR: {'āœ… WORKING' if ocr_success else 'āŒ FAILED'}") print(f" Classification: {'āœ… WORKING' if classification_success else 'āŒ FAILED'}") print(f" Bee Detection: {'āœ… SUCCESS' if bee_found else 'āŒ NOT FOUND'}") print(f" Dependency Isolation: {'āœ… ACHIEVED' if ocr_success and classification_success else 'āŒ FAILED'}") except Exception as e: print(f"āŒ Verification failed: {e}") import traceback traceback.print_exc() if __name__ == "__main__": asyncio.run(verify_fix())