232 lines
8.4 KiB
Python
232 lines
8.4 KiB
Python
"""
|
|
Fix OCR Processing with GPU Mode
|
|
Ensures GPU-accelerated OCR works for scanned PDF tables
|
|
"""
|
|
|
|
import os
|
|
import sys
|
|
import requests
|
|
import time
|
|
import json
|
|
from pathlib import Path
|
|
|
|
# Configure environment for GPU acceleration
|
|
os.environ['CUDA_PATH'] = r'C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.8'
|
|
os.environ['CUDA_HOME'] = os.environ['CUDA_PATH']
|
|
os.environ['CUDA_VISIBLE_DEVICES'] = '0'
|
|
os.environ['PATH'] = f"{os.environ['CUDA_PATH']}\\bin;{os.environ['PATH']}"
|
|
|
|
# Server configuration
|
|
BASE_URL = 'http://localhost:3015'
|
|
AUTH_CREDENTIALS = {'username': 'jleu3482', 'password': 'jleu1212'}
|
|
OCR_PDF_PATH = 'ocr.pdf'
|
|
|
|
def test_gpu_ocr_directly():
|
|
"""Test OCR directly with GPU mode"""
|
|
print("🧪 Testing GPU OCR directly...")
|
|
|
|
try:
|
|
# Import required libraries
|
|
import paddle
|
|
from paddleocr import PaddleOCR
|
|
import fitz # PyMuPDF
|
|
|
|
print(f"✅ PaddlePaddle version: {paddle.__version__}")
|
|
print(f"✅ CUDA available: {paddle.is_compiled_with_cuda()}")
|
|
print(f"✅ GPU devices: {paddle.device.cuda.device_count()}")
|
|
|
|
# Initialize PaddleOCR with GPU
|
|
print("🔄 Initializing PaddleOCR with GPU...")
|
|
ocr_engine = PaddleOCR(use_gpu=True, lang='en', show_log=False)
|
|
print("✅ PaddleOCR GPU initialization successful")
|
|
|
|
# Test with OCR PDF
|
|
if not os.path.exists(OCR_PDF_PATH):
|
|
print(f"❌ OCR PDF not found: {OCR_PDF_PATH}")
|
|
return False
|
|
|
|
print(f"📄 Testing with {OCR_PDF_PATH}")
|
|
pdf_document = fitz.open(OCR_PDF_PATH)
|
|
|
|
for page_num in range(len(pdf_document)):
|
|
page = pdf_document[page_num]
|
|
|
|
# Convert page to high-resolution image for better OCR
|
|
mat = fitz.Matrix(2, 2) # 2x resolution
|
|
pix = page.get_pixmap(matrix=mat)
|
|
img_data = pix.tobytes("png")
|
|
|
|
# Save temporary image
|
|
temp_path = f"temp_page_{page_num+1}.png"
|
|
with open(temp_path, 'wb') as f:
|
|
f.write(img_data)
|
|
|
|
# Perform OCR
|
|
print(f"🔄 Performing OCR on page {page_num+1}...")
|
|
start_time = time.time()
|
|
result = ocr_engine.ocr(temp_path, cls=True)
|
|
ocr_time = time.time() - start_time
|
|
|
|
if result and result[0]:
|
|
print(f"✅ OCR completed in {ocr_time:.2f} seconds")
|
|
print(f"📝 Extracted {len(result[0])} text boxes:")
|
|
|
|
for i, line in enumerate(result[0][:5]): # Show first 5
|
|
bbox, (text, confidence) = line
|
|
print(f" {i+1}. '{text}' (conf: {confidence:.2f})")
|
|
|
|
if len(result[0]) > 5:
|
|
print(f" ... and {len(result[0]) - 5} more lines")
|
|
else:
|
|
print(f"❌ No text detected on page {page_num+1}")
|
|
|
|
# Clean up
|
|
os.unlink(temp_path)
|
|
|
|
pdf_document.close()
|
|
return True
|
|
|
|
except Exception as e:
|
|
print(f"❌ GPU OCR test failed: {e}")
|
|
return False
|
|
|
|
def test_server_upload_with_gpu():
|
|
"""Test server upload with GPU OCR"""
|
|
print("\n🌐 Testing server upload with GPU OCR...")
|
|
|
|
# Login
|
|
try:
|
|
login_response = requests.post(f'{BASE_URL}/login', data=AUTH_CREDENTIALS, timeout=10)
|
|
if login_response.status_code != 200:
|
|
print(f"❌ Login failed: {login_response.status_code} - {login_response.text}")
|
|
return False
|
|
|
|
token = login_response.json().get('access_token')
|
|
headers = {'Authorization': f'Bearer {token}'}
|
|
print("✅ Login successful")
|
|
|
|
# Clear existing documents
|
|
clear_response = requests.delete(f'{BASE_URL}/documents', headers=headers, timeout=10)
|
|
if clear_response.status_code == 200:
|
|
print("✅ Cleared existing documents")
|
|
|
|
# Upload OCR PDF
|
|
print(f"📤 Uploading {OCR_PDF_PATH}...")
|
|
with open(OCR_PDF_PATH, 'rb') as f:
|
|
files = {'file': (OCR_PDF_PATH, f, 'application/pdf')}
|
|
upload_response = requests.post(f'{BASE_URL}/documents/upload', files=files, headers=headers, timeout=30)
|
|
|
|
if upload_response.status_code != 200:
|
|
print(f"❌ Upload failed: {upload_response.status_code} - {upload_response.text}")
|
|
return False
|
|
|
|
upload_data = upload_response.json()
|
|
print(f"✅ Upload successful: {upload_data}")
|
|
|
|
# Monitor processing
|
|
print("🔄 Monitoring OCR processing...")
|
|
for i in range(60): # Wait up to 60 seconds
|
|
time.sleep(2)
|
|
|
|
docs_response = requests.get(f'{BASE_URL}/documents', headers=headers, timeout=10)
|
|
if docs_response.status_code == 200:
|
|
docs_data = docs_response.json()
|
|
statuses = docs_data.get('statuses', {})
|
|
|
|
completed = len(statuses.get('completed', []))
|
|
processing = len(statuses.get('processing', []))
|
|
failed = len(statuses.get('failed', []))
|
|
|
|
print(f"⏰ Progress after {i*2}s: Processing={processing}, Completed={completed}, Failed={failed}")
|
|
|
|
# Check for completed documents
|
|
if completed > 0:
|
|
print("🎉 OCR processing completed successfully!")
|
|
return True
|
|
|
|
# Check for failed documents
|
|
if failed > 0:
|
|
failed_docs = statuses.get('failed', [])
|
|
for doc in failed_docs:
|
|
print(f"❌ Failed document: {doc.get('file_path')} - {doc.get('error_msg', 'Unknown error')}")
|
|
return False
|
|
|
|
print("⏰ Processing timeout - check server logs for details")
|
|
return False
|
|
|
|
except Exception as e:
|
|
print(f"❌ Server test failed: {e}")
|
|
return False
|
|
|
|
def test_search_functionality():
|
|
"""Test search functionality after OCR processing"""
|
|
print("\n🔍 Testing search functionality...")
|
|
|
|
try:
|
|
# Login
|
|
login_response = requests.post(f'{BASE_URL}/login', data=AUTH_CREDENTIALS, timeout=10)
|
|
if login_response.status_code != 200:
|
|
print("❌ Login failed for search test")
|
|
return False
|
|
|
|
token = login_response.json().get('access_token')
|
|
headers = {'Authorization': f'Bearer {token}'}
|
|
|
|
# Test search queries
|
|
test_queries = [
|
|
"safety precautions",
|
|
"minimum safe distance",
|
|
"high voltage",
|
|
"traction voltage"
|
|
]
|
|
|
|
for query in test_queries:
|
|
search_data = {'query': query}
|
|
search_response = requests.post(f'{BASE_URL}/api/search', json=search_data, headers=headers, timeout=10)
|
|
|
|
if search_response.status_code == 200:
|
|
search_results = search_response.json()
|
|
print(f"✅ Search for '{query}': Found {len(search_results.get('results', []))} results")
|
|
else:
|
|
print(f"❌ Search for '{query}' failed: {search_response.status_code}")
|
|
|
|
return True
|
|
|
|
except Exception as e:
|
|
print(f"❌ Search test failed: {e}")
|
|
return False
|
|
|
|
def main():
|
|
"""Main function to fix and test OCR processing"""
|
|
print("🚀 Fixing OCR Processing with GPU Mode")
|
|
print("=" * 50)
|
|
|
|
# Step 1: Test GPU OCR directly
|
|
gpu_ok = test_gpu_ocr_directly()
|
|
if not gpu_ok:
|
|
print("❌ GPU OCR test failed - cannot proceed")
|
|
return
|
|
|
|
# Step 2: Test server upload with GPU OCR
|
|
upload_ok = test_server_upload_with_gpu()
|
|
if not upload_ok:
|
|
print("❌ Server upload test failed")
|
|
return
|
|
|
|
# Step 3: Test search functionality
|
|
search_ok = test_search_functionality()
|
|
|
|
# Final results
|
|
print("\n" + "=" * 50)
|
|
print("📊 FINAL RESULTS:")
|
|
print(f" GPU OCR: {'✅' if gpu_ok else '❌'}")
|
|
print(f" Upload & Processing: {'✅' if upload_ok else '❌'}")
|
|
print(f" Search: {'✅' if search_ok else '❌'}")
|
|
|
|
if gpu_ok and upload_ok:
|
|
print("\n🎉 SUCCESS: OCR PDF upload, indexing, and search working with GPU mode!")
|
|
else:
|
|
print("\n❌ FAILED: Some tests did not pass")
|
|
|
|
if __name__ == "__main__":
|
|
main() |