""" Fix OCR Processing with GPU Mode Ensures GPU-accelerated OCR works for scanned PDF tables """ import os import sys import requests import time import json from pathlib import Path # Configure environment for GPU acceleration os.environ['CUDA_PATH'] = r'C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.8' os.environ['CUDA_HOME'] = os.environ['CUDA_PATH'] os.environ['CUDA_VISIBLE_DEVICES'] = '0' os.environ['PATH'] = f"{os.environ['CUDA_PATH']}\\bin;{os.environ['PATH']}" # Server configuration BASE_URL = 'http://localhost:3015' AUTH_CREDENTIALS = {'username': 'jleu3482', 'password': 'jleu1212'} OCR_PDF_PATH = 'ocr.pdf' def test_gpu_ocr_directly(): """Test OCR directly with GPU mode""" print("๐Ÿงช Testing GPU OCR directly...") try: # Import required libraries import paddle from paddleocr import PaddleOCR import fitz # PyMuPDF print(f"โœ… PaddlePaddle version: {paddle.__version__}") print(f"โœ… CUDA available: {paddle.is_compiled_with_cuda()}") print(f"โœ… GPU devices: {paddle.device.cuda.device_count()}") # Initialize PaddleOCR with GPU print("๐Ÿ”„ Initializing PaddleOCR with GPU...") ocr_engine = PaddleOCR(use_gpu=True, lang='en', show_log=False) print("โœ… PaddleOCR GPU initialization successful") # Test with OCR PDF if not os.path.exists(OCR_PDF_PATH): print(f"โŒ OCR PDF not found: {OCR_PDF_PATH}") return False print(f"๐Ÿ“„ Testing with {OCR_PDF_PATH}") pdf_document = fitz.open(OCR_PDF_PATH) for page_num in range(len(pdf_document)): page = pdf_document[page_num] # Convert page to high-resolution image for better OCR mat = fitz.Matrix(2, 2) # 2x resolution pix = page.get_pixmap(matrix=mat) img_data = pix.tobytes("png") # Save temporary image temp_path = f"temp_page_{page_num+1}.png" with open(temp_path, 'wb') as f: f.write(img_data) # Perform OCR print(f"๐Ÿ”„ Performing OCR on page {page_num+1}...") start_time = time.time() result = ocr_engine.ocr(temp_path, cls=True) ocr_time = time.time() - start_time if result and result[0]: print(f"โœ… OCR completed in {ocr_time:.2f} seconds") print(f"๐Ÿ“ Extracted {len(result[0])} text boxes:") for i, line in enumerate(result[0][:5]): # Show first 5 bbox, (text, confidence) = line print(f" {i+1}. '{text}' (conf: {confidence:.2f})") if len(result[0]) > 5: print(f" ... and {len(result[0]) - 5} more lines") else: print(f"โŒ No text detected on page {page_num+1}") # Clean up os.unlink(temp_path) pdf_document.close() return True except Exception as e: print(f"โŒ GPU OCR test failed: {e}") return False def test_server_upload_with_gpu(): """Test server upload with GPU OCR""" print("\n๐ŸŒ Testing server upload with GPU OCR...") # Login try: login_response = requests.post(f'{BASE_URL}/login', data=AUTH_CREDENTIALS, timeout=10) if login_response.status_code != 200: print(f"โŒ Login failed: {login_response.status_code} - {login_response.text}") return False token = login_response.json().get('access_token') headers = {'Authorization': f'Bearer {token}'} print("โœ… Login successful") # Clear existing documents clear_response = requests.delete(f'{BASE_URL}/documents', headers=headers, timeout=10) if clear_response.status_code == 200: print("โœ… Cleared existing documents") # Upload OCR PDF print(f"๐Ÿ“ค Uploading {OCR_PDF_PATH}...") with open(OCR_PDF_PATH, 'rb') as f: files = {'file': (OCR_PDF_PATH, f, 'application/pdf')} upload_response = requests.post(f'{BASE_URL}/documents/upload', files=files, headers=headers, timeout=30) if upload_response.status_code != 200: print(f"โŒ Upload failed: {upload_response.status_code} - {upload_response.text}") return False upload_data = upload_response.json() print(f"โœ… Upload successful: {upload_data}") # Monitor processing print("๐Ÿ”„ Monitoring OCR processing...") for i in range(60): # Wait up to 60 seconds time.sleep(2) docs_response = requests.get(f'{BASE_URL}/documents', headers=headers, timeout=10) if docs_response.status_code == 200: docs_data = docs_response.json() statuses = docs_data.get('statuses', {}) completed = len(statuses.get('completed', [])) processing = len(statuses.get('processing', [])) failed = len(statuses.get('failed', [])) print(f"โฐ Progress after {i*2}s: Processing={processing}, Completed={completed}, Failed={failed}") # Check for completed documents if completed > 0: print("๐ŸŽ‰ OCR processing completed successfully!") return True # Check for failed documents if failed > 0: failed_docs = statuses.get('failed', []) for doc in failed_docs: print(f"โŒ Failed document: {doc.get('file_path')} - {doc.get('error_msg', 'Unknown error')}") return False print("โฐ Processing timeout - check server logs for details") return False except Exception as e: print(f"โŒ Server test failed: {e}") return False def test_search_functionality(): """Test search functionality after OCR processing""" print("\n๐Ÿ” Testing search functionality...") try: # Login login_response = requests.post(f'{BASE_URL}/login', data=AUTH_CREDENTIALS, timeout=10) if login_response.status_code != 200: print("โŒ Login failed for search test") return False token = login_response.json().get('access_token') headers = {'Authorization': f'Bearer {token}'} # Test search queries test_queries = [ "safety precautions", "minimum safe distance", "high voltage", "traction voltage" ] for query in test_queries: search_data = {'query': query} search_response = requests.post(f'{BASE_URL}/api/search', json=search_data, headers=headers, timeout=10) if search_response.status_code == 200: search_results = search_response.json() print(f"โœ… Search for '{query}': Found {len(search_results.get('results', []))} results") else: print(f"โŒ Search for '{query}' failed: {search_response.status_code}") return True except Exception as e: print(f"โŒ Search test failed: {e}") return False def main(): """Main function to fix and test OCR processing""" print("๐Ÿš€ Fixing OCR Processing with GPU Mode") print("=" * 50) # Step 1: Test GPU OCR directly gpu_ok = test_gpu_ocr_directly() if not gpu_ok: print("โŒ GPU OCR test failed - cannot proceed") return # Step 2: Test server upload with GPU OCR upload_ok = test_server_upload_with_gpu() if not upload_ok: print("โŒ Server upload test failed") return # Step 3: Test search functionality search_ok = test_search_functionality() # Final results print("\n" + "=" * 50) print("๐Ÿ“Š FINAL RESULTS:") print(f" GPU OCR: {'โœ…' if gpu_ok else 'โŒ'}") print(f" Upload & Processing: {'โœ…' if upload_ok else 'โŒ'}") print(f" Search: {'โœ…' if search_ok else 'โŒ'}") if gpu_ok and upload_ok: print("\n๐ŸŽ‰ SUCCESS: OCR PDF upload, indexing, and search working with GPU mode!") else: print("\nโŒ FAILED: Some tests did not pass") if __name__ == "__main__": main()