from reportlab.lib.pagesizes import letter from reportlab.pdfgen import canvas import os def create_test_pdf(): filename = "test_meaningful.pdf" c = canvas.Canvas(filename, pagesize=letter) # Set up some sample text with entities text_lines = [ "Test Document for OCR Processing", "This document contains various entities for testing.", "John Smith works at Microsoft Corporation in Seattle.", "The company was founded by Bill Gates and Paul Allen.", "Microsoft develops software products like Windows and Office.", "The headquarters is located in Redmond, Washington.", "This document was created on October 28, 2025.", "It contains names of people, organizations, and locations.", "These should be extracted as entities by the system." ] # Draw text on the page y_position = 700 for line in text_lines: c.drawString(100, y_position, line) y_position -= 20 c.save() print(f"Created test PDF: {filename}") print(f"File size: {os.path.getsize(filename)} bytes") return filename if __name__ == "__main__": create_test_pdf()