Files
railseek6/fix_ocr_processing.py

232 lines
8.4 KiB
Python

"""
Fix OCR Processing with GPU Mode
Ensures GPU-accelerated OCR works for scanned PDF tables
"""
import os
import sys
import requests
import time
import json
from pathlib import Path
# Configure environment for GPU acceleration
os.environ['CUDA_PATH'] = r'C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.8'
os.environ['CUDA_HOME'] = os.environ['CUDA_PATH']
os.environ['CUDA_VISIBLE_DEVICES'] = '0'
os.environ['PATH'] = f"{os.environ['CUDA_PATH']}\\bin;{os.environ['PATH']}"
# Server configuration
BASE_URL = 'http://localhost:3015'
AUTH_CREDENTIALS = {'username': 'jleu3482', 'password': 'jleu1212'}
OCR_PDF_PATH = 'ocr.pdf'
def test_gpu_ocr_directly():
"""Test OCR directly with GPU mode"""
print("🧪 Testing GPU OCR directly...")
try:
# Import required libraries
import paddle
from paddleocr import PaddleOCR
import fitz # PyMuPDF
print(f"✅ PaddlePaddle version: {paddle.__version__}")
print(f"✅ CUDA available: {paddle.is_compiled_with_cuda()}")
print(f"✅ GPU devices: {paddle.device.cuda.device_count()}")
# Initialize PaddleOCR with GPU
print("🔄 Initializing PaddleOCR with GPU...")
ocr_engine = PaddleOCR(use_gpu=True, lang='en', show_log=False)
print("✅ PaddleOCR GPU initialization successful")
# Test with OCR PDF
if not os.path.exists(OCR_PDF_PATH):
print(f"❌ OCR PDF not found: {OCR_PDF_PATH}")
return False
print(f"📄 Testing with {OCR_PDF_PATH}")
pdf_document = fitz.open(OCR_PDF_PATH)
for page_num in range(len(pdf_document)):
page = pdf_document[page_num]
# Convert page to high-resolution image for better OCR
mat = fitz.Matrix(2, 2) # 2x resolution
pix = page.get_pixmap(matrix=mat)
img_data = pix.tobytes("png")
# Save temporary image
temp_path = f"temp_page_{page_num+1}.png"
with open(temp_path, 'wb') as f:
f.write(img_data)
# Perform OCR
print(f"🔄 Performing OCR on page {page_num+1}...")
start_time = time.time()
result = ocr_engine.ocr(temp_path, cls=True)
ocr_time = time.time() - start_time
if result and result[0]:
print(f"✅ OCR completed in {ocr_time:.2f} seconds")
print(f"📝 Extracted {len(result[0])} text boxes:")
for i, line in enumerate(result[0][:5]): # Show first 5
bbox, (text, confidence) = line
print(f" {i+1}. '{text}' (conf: {confidence:.2f})")
if len(result[0]) > 5:
print(f" ... and {len(result[0]) - 5} more lines")
else:
print(f"❌ No text detected on page {page_num+1}")
# Clean up
os.unlink(temp_path)
pdf_document.close()
return True
except Exception as e:
print(f"❌ GPU OCR test failed: {e}")
return False
def test_server_upload_with_gpu():
"""Test server upload with GPU OCR"""
print("\n🌐 Testing server upload with GPU OCR...")
# Login
try:
login_response = requests.post(f'{BASE_URL}/login', data=AUTH_CREDENTIALS, timeout=10)
if login_response.status_code != 200:
print(f"❌ Login failed: {login_response.status_code} - {login_response.text}")
return False
token = login_response.json().get('access_token')
headers = {'Authorization': f'Bearer {token}'}
print("✅ Login successful")
# Clear existing documents
clear_response = requests.delete(f'{BASE_URL}/documents', headers=headers, timeout=10)
if clear_response.status_code == 200:
print("✅ Cleared existing documents")
# Upload OCR PDF
print(f"📤 Uploading {OCR_PDF_PATH}...")
with open(OCR_PDF_PATH, 'rb') as f:
files = {'file': (OCR_PDF_PATH, f, 'application/pdf')}
upload_response = requests.post(f'{BASE_URL}/documents/upload', files=files, headers=headers, timeout=30)
if upload_response.status_code != 200:
print(f"❌ Upload failed: {upload_response.status_code} - {upload_response.text}")
return False
upload_data = upload_response.json()
print(f"✅ Upload successful: {upload_data}")
# Monitor processing
print("🔄 Monitoring OCR processing...")
for i in range(60): # Wait up to 60 seconds
time.sleep(2)
docs_response = requests.get(f'{BASE_URL}/documents', headers=headers, timeout=10)
if docs_response.status_code == 200:
docs_data = docs_response.json()
statuses = docs_data.get('statuses', {})
completed = len(statuses.get('completed', []))
processing = len(statuses.get('processing', []))
failed = len(statuses.get('failed', []))
print(f"⏰ Progress after {i*2}s: Processing={processing}, Completed={completed}, Failed={failed}")
# Check for completed documents
if completed > 0:
print("🎉 OCR processing completed successfully!")
return True
# Check for failed documents
if failed > 0:
failed_docs = statuses.get('failed', [])
for doc in failed_docs:
print(f"❌ Failed document: {doc.get('file_path')} - {doc.get('error_msg', 'Unknown error')}")
return False
print("⏰ Processing timeout - check server logs for details")
return False
except Exception as e:
print(f"❌ Server test failed: {e}")
return False
def test_search_functionality():
"""Test search functionality after OCR processing"""
print("\n🔍 Testing search functionality...")
try:
# Login
login_response = requests.post(f'{BASE_URL}/login', data=AUTH_CREDENTIALS, timeout=10)
if login_response.status_code != 200:
print("❌ Login failed for search test")
return False
token = login_response.json().get('access_token')
headers = {'Authorization': f'Bearer {token}'}
# Test search queries
test_queries = [
"safety precautions",
"minimum safe distance",
"high voltage",
"traction voltage"
]
for query in test_queries:
search_data = {'query': query}
search_response = requests.post(f'{BASE_URL}/api/search', json=search_data, headers=headers, timeout=10)
if search_response.status_code == 200:
search_results = search_response.json()
print(f"✅ Search for '{query}': Found {len(search_results.get('results', []))} results")
else:
print(f"❌ Search for '{query}' failed: {search_response.status_code}")
return True
except Exception as e:
print(f"❌ Search test failed: {e}")
return False
def main():
"""Main function to fix and test OCR processing"""
print("🚀 Fixing OCR Processing with GPU Mode")
print("=" * 50)
# Step 1: Test GPU OCR directly
gpu_ok = test_gpu_ocr_directly()
if not gpu_ok:
print("❌ GPU OCR test failed - cannot proceed")
return
# Step 2: Test server upload with GPU OCR
upload_ok = test_server_upload_with_gpu()
if not upload_ok:
print("❌ Server upload test failed")
return
# Step 3: Test search functionality
search_ok = test_search_functionality()
# Final results
print("\n" + "=" * 50)
print("📊 FINAL RESULTS:")
print(f" GPU OCR: {'' if gpu_ok else ''}")
print(f" Upload & Processing: {'' if upload_ok else ''}")
print(f" Search: {'' if search_ok else ''}")
if gpu_ok and upload_ok:
print("\n🎉 SUCCESS: OCR PDF upload, indexing, and search working with GPU mode!")
else:
print("\n❌ FAILED: Some tests did not pass")
if __name__ == "__main__":
main()