ocr speed improved

This commit is contained in:
2026-01-13 19:10:24 +08:00
parent a5eb381384
commit e7256a10ea
7 changed files with 283 additions and 22 deletions

View File

@@ -244,6 +244,23 @@ def create_app(args):
task.add_done_callback(app.state.background_tasks.discard)
logger.info(f"Process {os.getpid()} auto scan task started at startup for workspace '{args.workspace}'.")
# Warm up OCR processor in background to avoid coldstart delay on first upload
async def warm_up_ocr_processor():
try:
logger.info("Starting OCR processor warmup...")
# Import inside function to avoid unnecessary dependency if OCR not used
from lightrag.document_processor import get_document_processor
# This will initialize OptimizedOCRProcessor (≈9 seconds)
processor = get_document_processor()
logger.info("OCR processor warmed up successfully")
except Exception as e:
logger.warning(f"OCR warmup failed (noncritical): {e}")
# Schedule warmup as a background task (nonblocking)
warm_up_task = asyncio.create_task(warm_up_ocr_processor())
app.state.background_tasks.add(warm_up_task)
warm_up_task.add_done_callback(app.state.background_tasks.discard)
ASCIIColors.green("\nServer is ready to accept connections! 🚀\n")
yield