from reportlab.pdfgen import canvas from reportlab.lib.pagesizes import letter import os def create_test_ocr_pdf(): # Create a new PDF with actual text content for OCR testing pdf_path = 'test_ocr_content.pdf' c = canvas.Canvas(pdf_path, pagesize=letter) width, height = letter # Add some text at different positions c.setFont("Helvetica", 12) c.drawString(100, 750, "This is a test PDF document for OCR processing.") c.drawString(100, 730, "It contains sample text that should be extracted by PaddleOCR.") c.drawString(100, 710, "Using GPU acceleration for faster processing.") c.drawString(100, 690, "Page 1 of the test document.") # Add more text in different fonts and sizes c.setFont("Helvetica-Bold", 14) c.drawString(100, 650, "Important Heading") c.setFont("Helvetica", 10) c.drawString(100, 630, "This is smaller text to test OCR accuracy.") c.drawString(100, 610, "Numbers: 1234567890") c.drawString(100, 590, "Special characters: !@#$%^&*()") c.save() print(f"✅ Created test PDF: {pdf_path}") return pdf_path if __name__ == "__main__": create_test_ocr_pdf()