import PyPDF2 import os def check_pdf_content(pdf_path): if not os.path.exists(pdf_path): print(f"❌ {pdf_path} not found") return try: with open(pdf_path, 'rb') as file: reader = PyPDF2.PdfReader(file) num_pages = len(reader.pages) print(f"📄 {pdf_path} has {num_pages} pages") for i, page in enumerate(reader.pages): text = page.extract_text() print(f"Page {i+1}:") print(f" Text length: {len(text)}") print(f" Text preview: {repr(text[:100])}") if text.strip(): print(" ✅ Contains text") else: print(" ❌ No text or only whitespace") except Exception as e: print(f"❌ Error reading PDF: {e}") if __name__ == "__main__": check_pdf_content("ocr.pdf")