Files
railseek6/create_test_pdf.py

35 lines
1.2 KiB
Python

from reportlab.lib.pagesizes import letter
from reportlab.pdfgen import canvas
import os
def create_test_pdf():
filename = "test_meaningful.pdf"
c = canvas.Canvas(filename, pagesize=letter)
# Set up some sample text with entities
text_lines = [
"Test Document for OCR Processing",
"This document contains various entities for testing.",
"John Smith works at Microsoft Corporation in Seattle.",
"The company was founded by Bill Gates and Paul Allen.",
"Microsoft develops software products like Windows and Office.",
"The headquarters is located in Redmond, Washington.",
"This document was created on October 28, 2025.",
"It contains names of people, organizations, and locations.",
"These should be extracted as entities by the system."
]
# Draw text on the page
y_position = 700
for line in text_lines:
c.drawString(100, y_position, line)
y_position -= 20
c.save()
print(f"Created test PDF: {filename}")
print(f"File size: {os.path.getsize(filename)} bytes")
return filename
if __name__ == "__main__":
create_test_pdf()