import os import sys import subprocess import requests import time import fitz # PyMuPDF from PIL import Image import io import numpy as np def setup_cuda_environment(): """Setup CUDA 11.8 environment for GPU PaddleOCR""" print("=== SETTING UP CUDA 11.8 ENVIRONMENT ===") cuda_path = r'C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.8' if not os.path.exists(cuda_path): print(f"✗ CUDA 11.8 not found at: {cuda_path}") return None print(f"✓ CUDA 11.8 found at: {cuda_path}") # Create environment dictionary env = os.environ.copy() env['CUDA_PATH'] = cuda_path env['CUDA_HOME'] = cuda_path env['CUDA_VISIBLE_DEVICES'] = '0' env['LIGHTRAG_OCR_ENGINE'] = 'paddleocr' # Add CUDA to PATH - make sure it's at the beginning cuda_bin = os.path.join(cuda_path, 'bin') current_path = env.get('PATH', '') # Remove any existing CUDA paths to avoid conflicts paths = current_path.split(';') paths = [p for p in paths if 'CUDA' not in p and 'NVIDIA' not in p] clean_path = ';'.join(paths) # Add CUDA bin at the beginning env['PATH'] = cuda_bin + ';' + clean_path # Add encoding environment variables env['PYTHONIOENCODING'] = 'utf-8' env['LANG'] = 'en_US.UTF-8' env['LC_ALL'] = 'en_US.UTF-8' print("✓ Environment configured for CUDA 11.8") print(f"✓ PATH includes CUDA bin: {cuda_bin}") # Verify DLLs are accessible cudnn_dll = os.path.join(cuda_bin, 'cudnn_ops_infer64_8.dll') if os.path.exists(cudnn_dll): print(f"✓ cuDNN DLL found: {cudnn_dll}") else: print(f"✗ cuDNN DLL not found: {cudnn_dll}") return env def test_paddleocr_gpu_direct(): """Test PaddleOCR GPU directly on ocr.pdf using proper PDF handling""" print("\n=== TESTING PADDLEOCR GPU DIRECTLY ON OCR.PDF ===") try: import paddle from paddleocr import PaddleOCR print(f"✓ PaddlePaddle version: {paddle.__version__}") print(f"✓ GPU available: {paddle.is_compiled_with_cuda()}") if paddle.is_compiled_with_cuda(): paddle.device.set_device('gpu') print("✓ Using GPU for PaddleOCR") # Method 1: Convert PDF to images first, then run OCR print("\n--- Method 1: Converting PDF to images first ---") # Open PDF with PyMuPDF pdf_document = fitz.open('ocr.pdf') print(f"✓ PDF opened successfully, {pdf_document.page_count} pages") all_text = [] for page_num in range(pdf_document.page_count): page = pdf_document.load_page(page_num) pix = page.get_pixmap() img_data = pix.tobytes("png") # Convert to PIL Image then to numpy array image = Image.open(io.BytesIO(img_data)) image_np = np.array(image) # Initialize PaddleOCR with GPU ocr = PaddleOCR(use_angle_cls=False, lang='en', use_gpu=True) # Run OCR on the numpy array result = ocr.ocr(image_np, cls=False) if result and result[0]: page_text = "" for line in result[0]: text = line[1][0] confidence = line[1][1] page_text += f"{text} " print(f" Page {page_num+1}: '{text}' (confidence: {confidence:.3f})") all_text.append(page_text.strip()) else: print(f" Page {page_num+1}: No text detected") pdf_document.close() if all_text: print(f"\n✓ Successfully extracted text from {len(all_text)} pages") full_text = " ".join(all_text) print(f"Total text length: {len(full_text)} characters") print(f"Text preview: {full_text[:200]}...") return True else: print("✗ No text extracted from PDF") return False except Exception as e: print(f"✗ Error in direct PaddleOCR test: {e}") import traceback traceback.print_exc() return False def start_lightrag_server_with_ocr_fix(env): """Start LightRAG server with fixed OCR handling""" print("\n=== STARTING LIGHTRAG SERVER WITH OCR FIX ===") try: cmd = [ 'lightrag-server', '--port', '3015', '--embedding-binding', 'ollama', '--rerank-binding', 'null', '--host', '0.0.0.0' ] print(f"Starting server: {' '.join(cmd)}") process = subprocess.Popen( cmd, env=env, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True, encoding='utf-8', errors='replace' ) # Wait for server to start print("Waiting for server to start...") for i in range(60): try: response = requests.get('http://localhost:3015/', timeout=5) if response.status_code == 200: print("✓ Server started successfully!") return process except: pass time.sleep(1) print("✗ Server failed to start within timeout") return None except Exception as e: print(f"✗ Failed to start server: {e}") return None def test_ocr_upload_workflow(): """Test complete OCR upload workflow""" print("\n=== TESTING OCR UPLOAD WORKFLOW ===") base_url = 'http://localhost:3015' try: # Login login_data = {'username': 'jleu3482', 'password': 'jleu1212'} login_response = requests.post(f'{base_url}/login', data=login_data, timeout=30) if login_response.status_code != 200: print(f"✗ Login failed: {login_response.text}") return False token = login_response.json().get('access_token') headers = {'Authorization': f'Bearer {token}'} print("✓ Login successful") # Clear existing documents print("Clearing existing documents...") clear_response = requests.delete(f'{base_url}/documents', headers=headers, timeout=30) print(f"Clear status: {clear_response.status_code}") # Upload OCR PDF print(f"\n=== UPLOADING OCR.PDF ===") print(f"File: ocr.pdf ({os.path.getsize('ocr.pdf')} bytes)") with open('ocr.pdf', 'rb') as f: files = {'file': ('ocr.pdf', f, 'application/pdf')} upload_response = requests.post(f'{base_url}/documents/upload', files=files, headers=headers, timeout=60) print(f"Upload status: {upload_response.status_code}") if upload_response.status_code != 200: print(f"✗ Upload failed: {upload_response.text}") return False upload_data = upload_response.json() print(f"Upload response: {upload_data}") track_id = upload_data.get('track_id') if not track_id: print("✗ No track ID returned") return False # Monitor processing print(f"\n=== MONITORING OCR PROCESSING ===") print("OCR processing with GPU acceleration...") max_wait = 300 # 5 minutes start_time = time.time() while time.time() - start_time < max_wait: try: # Check document status docs_response = requests.get(f'{base_url}/documents', headers=headers, timeout=30) if docs_response.status_code == 200: docs_data = docs_response.json() statuses = docs_data.get('statuses', {}) completed = statuses.get('completed', []) processing = statuses.get('processing', []) failed = statuses.get('failed', []) elapsed = int(time.time() - start_time) # Check for our file in completed for doc in completed: if doc.get('file_path') == 'ocr.pdf': print(f"\n🎉 OCR PROCESSING COMPLETED in {elapsed} seconds!") print(f" File: {doc.get('file_path')}") print(f" Size: {doc.get('file_size')}") print(f" Chunks: {doc.get('chunk_count')}") return True # Check if failed for doc in failed: if doc.get('file_path') == 'ocr.pdf': print(f"✗ OCR processing failed: {doc.get('error_msg', 'Unknown error')}") return False # Still processing if elapsed % 30 == 0: print(f" Still processing... ({elapsed}s elapsed, {len(processing)} files processing)") time.sleep(10) except requests.exceptions.RequestException as e: print(f" Connection error: {e}") time.sleep(10) print(f"✗ OCR processing timed out after {max_wait} seconds") return False except Exception as e: print(f"✗ Error during OCR workflow test: {e}") return False def main(): """Main function to test OCR PDF with GPU PaddleOCR""" print("OCR PDF TEST WITH GPU PADDLEOCR") print("=" * 50) print("Testing: Direct OCR → Server Upload → Processing") print("CUDA 11.8: Enabled") print("Document: ocr.pdf") print("=" * 50) # Step 1: Setup CUDA environment env = setup_cuda_environment() if not env: print("\n❌ CUDA setup failed") return # Step 2: Test PaddleOCR GPU directly on ocr.pdf if not test_paddleocr_gpu_direct(): print("\n❌ Direct PaddleOCR test failed") return # Step 3: Start server server_process = start_lightrag_server_with_ocr_fix(env) if not server_process: print("\n❌ Failed to start server") return try: # Step 4: Test complete upload workflow success = test_ocr_upload_workflow() if success: print("\n" + "=" * 50) print("🎉 SUCCESS: OCR PDF WORKFLOW COMPLETED!") print("=" * 50) print("The ocr.pdf document has been:") print("✓ Successfully processed with GPU-accelerated OCR") print("✓ Uploaded to the LightRAG server") print("✓ Indexed and made searchable") print("\nYou can now access the web UI at: http://localhost:3015") else: print("\n❌ OCR workflow failed") finally: # Clean up print("\nStopping server...") server_process.terminate() try: server_process.wait(timeout=10) except: server_process.kill() print("Test completed.") if __name__ == "__main__": main()