from paddleocr import PaddleOCR import os # Initialize PaddleOCR with CPU to avoid GPU conflict with server ocr = PaddleOCR(use_angle_cls=True, lang='en', use_gpu=False) # Check the ocr.pdf pdf_path = 'ocr.pdf' if not os.path.exists(pdf_path): print(f"{pdf_path} not found") exit(1) print(f"Running PaddleOCR on {pdf_path}...") # Run OCR on the PDF result = ocr.ocr(pdf_path, cls=True) # Print the result if result: for page_num, page in enumerate(result): print(f"Page {page_num+1}:") if page: for line_num, line in enumerate(page): print(f" Line {line_num+1}: {line}") else: print(" No text detected") else: print("No result returned") # If no text found, print a message if not result or all(len(page) == 0 for page in result): print("No text found in the PDF by PaddleOCR")