railseek6/test_gpu_ocr_fixed.py

#!/usr/bin/env python3
"""
Test script to verify GPU-accelerated OCR with fixed cuDNN DLLs
"""

import os
import sys
import requests
import json
import time
from pathlib import Path

def test_gpu_ocr_with_server():
    """Test OCR PDF upload with GPU acceleration"""

    print("🧪 Testing GPU-accelerated OCR with fixed cuDNN DLLs")
    print("=" * 60)

    # Set environment for CUDA 11.8
    cuda_path = r"C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.8"
    if os.path.exists(cuda_path):
        os.environ['CUDA_PATH'] = cuda_path
        os.environ['CUDA_HOME'] = cuda_path
        os.environ['CUDA_VISIBLE_DEVICES'] = '0'
        os.environ['LIGHTRAG_OCR_ENGINE'] = 'paddleocr'

        # Add CUDA to PATH
        cuda_bin = os.path.join(cuda_path, 'bin')
        clean_path = os.environ.get('PATH', '')
        os.environ['PATH'] = cuda_bin + ';' + clean_path

        print(f"✅ CUDA 11.8 environment configured")
        print(f"   CUDA_PATH: {cuda_path}")
        print(f"   CUDA bin directory exists: {os.path.exists(cuda_bin)}")

        # Check for cuDNN DLLs
        cudnn_files = [
            'cudnn64_8.dll',
            'cudnn_ops_infer64_8.dll',
            'cudnn_adv_infer64_8.dll'
        ]

        for dll in cudnn_files:
            dll_path = os.path.join(cuda_bin, dll)
            if os.path.exists(dll_path):
                print(f"✅ {dll} found: {dll_path}")
            else:
                print(f"❌ {dll} NOT found: {dll_path}")
    else:
        print(f"❌ CUDA 11.8 not found at {cuda_path}")
        return False

    # Test PaddleOCR GPU directly
    print("\n🔍 Testing PaddleOCR GPU directly...")
    try:
        from paddleocr import PaddleOCR
        import paddle

        print(f"Paddle version: {paddle.__version__}")
        print(f"Paddle is compiled with CUDA: {paddle.is_compiled_with_cuda()}")
        print(f"CUDA available: {paddle.device.is_compiled_with_cuda()}")
        print(f"GPU devices: {paddle.device.cuda.device_count()}")

        # Initialize PaddleOCR with GPU
        ocr = PaddleOCR(use_gpu=True, lang='en')
        print("✅ PaddleOCR GPU initialization successful")

        # Test with a simple image
        test_pdf = "ocr.pdf"
        if os.path.exists(test_pdf):
            print(f"📄 Testing with {test_pdf}")

            # Convert PDF to image for OCR test
            import fitz  # PyMuPDF
            doc = fitz.open(test_pdf)
            page = doc[0]
            pix = page.get_pixmap()
            img_path = "test_page.png"
            pix.save(img_path)
            doc.close()

            # Perform OCR
            print("🔄 Performing OCR on PDF page...")
            start_time = time.time()
            result = ocr.ocr(img_path, cls=False)
            ocr_time = time.time() - start_time

            print(f"✅ OCR completed in {ocr_time:.2f} seconds")

            # Extract text
            text_lines = []
            if result and result[0]:
                for line in result[0]:
                    text = line[1][0]
                    confidence = line[1][1]
                    text_lines.append(f"{text} (conf: {confidence:.2f})")

                print(f"📝 Extracted {len(text_lines)} text boxes:")
                for i, line in enumerate(text_lines[:5]):  # Show first 5
                    print(f"   {i+1}. {line}")
                if len(text_lines) > 5:
                    print(f"   ... and {len(text_lines) - 5} more lines")

            # Clean up
            if os.path.exists(img_path):
                os.remove(img_path)

        else:
            print(f"❌ Test PDF not found: {test_pdf}")

    except Exception as e:
        print(f"❌ PaddleOCR GPU test failed: {e}")
        import traceback
        traceback.print_exc()
        return False

    # Test server upload
    print("\n🌐 Testing server upload with GPU OCR...")
    try:
        # First login to get JWT token - try common credentials
        login_url = "http://localhost:3015/login"
        credentials_to_try = [
            {"username": "admin", "password": "admin"},
            {"username": "admin", "password": "password"},
            {"username": "user", "password": "user"},
            {"username": "lightrag", "password": "lightrag"}
        ]

        token = None
        for creds in credentials_to_try:
            response = requests.post(login_url, data=creds)
            if response.status_code == 200:
                token = response.json().get('access_token')
                print(f"✅ Login successful with {creds['username']}/{creds['password']}")
                break
            else:
                print(f"❌ Login failed with {creds['username']}/{creds['password']}: {response.status_code}")

        if not token:
            print("❌ All login attempts failed. Please check server authentication configuration.")
            return False

        # Upload OCR PDF
        upload_url = "http://localhost:3015/documents/upload"
        headers = {
            "Authorization": f"Bearer {token}",
            "Content-Type": "multipart/form-data"
        }

        files = {
            'file': ('ocr.pdf', open('ocr.pdf', 'rb'), 'application/pdf')
        }

        print("📤 Uploading ocr.pdf to server...")
        upload_response = requests.post(upload_url, files=files, headers=headers)

        if upload_response.status_code == 200:
            result = upload_response.json()
            print("✅ Upload successful")
            print(f"📊 Upload result: {json.dumps(result, indent=2)}")

            # Wait a bit for processing
            print("⏳ Waiting for OCR processing...")
            time.sleep(5)

            # Check document status
            docs_url = "http://localhost:3015/documents"
            docs_response = requests.get(docs_url, headers=headers)
            if docs_response.status_code == 200:
                docs = docs_response.json()
                print(f"📋 Documents in system: {len(docs)}")
                for doc in docs:
                    print(f"   - {doc.get('filename', 'Unknown')}: {doc.get('status', 'Unknown')}")

            return True
        else:
            print(f"❌ Upload failed: {upload_response.status_code} - {upload_response.text}")
            return False

    except Exception as e:
        print(f"❌ Server test failed: {e}")
        import traceback
        traceback.print_exc()
        return False

if __name__ == "__main__":
    print("🚀 Starting GPU OCR Test with Fixed cuDNN DLLs")
    print("=" * 60)

    success = test_gpu_ocr_with_server()

    print("\n" + "=" * 60)
    if success:
        print("🎉 GPU OCR TEST COMPLETED SUCCESSFULLY!")
        print("   - cuDNN DLLs are properly named and accessible")
        print("   - PaddleOCR GPU acceleration is working")
        print("   - Server upload and processing completed")
    else:
        print("❌ GPU OCR TEST FAILED")
        print("   Check the errors above for troubleshooting")

    sys.exit(0 if success else 1)