31 lines
868 B
Python
31 lines
868 B
Python
from paddleocr import PaddleOCR
|
|
import os
|
|
|
|
# Initialize PaddleOCR with CPU to avoid GPU conflict with server
|
|
ocr = PaddleOCR(use_angle_cls=True, lang='en', use_gpu=False)
|
|
|
|
# Check the ocr.pdf
|
|
pdf_path = 'ocr.pdf'
|
|
if not os.path.exists(pdf_path):
|
|
print(f"{pdf_path} not found")
|
|
exit(1)
|
|
|
|
print(f"Running PaddleOCR on {pdf_path}...")
|
|
# Run OCR on the PDF
|
|
result = ocr.ocr(pdf_path, cls=True)
|
|
|
|
# Print the result
|
|
if result:
|
|
for page_num, page in enumerate(result):
|
|
print(f"Page {page_num+1}:")
|
|
if page:
|
|
for line_num, line in enumerate(page):
|
|
print(f" Line {line_num+1}: {line}")
|
|
else:
|
|
print(" No text detected")
|
|
else:
|
|
print("No result returned")
|
|
|
|
# If no text found, print a message
|
|
if not result or all(len(page) == 0 for page in result):
|
|
print("No text found in the PDF by PaddleOCR") |