""" Debug script to trace the server-side processing issue """ import sys import os sys.path.append('LightRAG-main') import asyncio from lightrag.document_processor import get_document_processor from lightrag.api.routers.document_routes import pipeline_enqueue_file from lightrag import LightRAG from pathlib import Path async def debug_server_processing(): print("šŸ” Debugging server-side processing pipeline...") # Test 1: Direct document processor (already working) print("\nšŸ“„ Test 1: Direct Document Processor") processor = get_document_processor() direct_result = await processor.process_document('ocr.pdf') print(f" Direct processing - Success: {direct_result.success}") print(f" Direct processing - Content length: {len(direct_result.content)}") print(f" Direct processing - Content stripped: {len(direct_result.content.strip())}") # Test 2: Simulate server pipeline print("\nšŸ”„ Test 2: Simulating Server Pipeline") # Create a temporary RAG instance for testing from lightrag.utils import generate_track_id # Create a minimal RAG instance with correct parameters rag = LightRAG( workspace="test_workspace", enable_llm_cache_for_entity_extract=True ) # Save the file to simulate upload temp_file = Path("temp_ocr.pdf") if not temp_file.exists(): import shutil shutil.copy2('ocr.pdf', temp_file) try: # Test the pipeline_enqueue_file function directly print(" Testing pipeline_enqueue_file...") success, track_id = await pipeline_enqueue_file(rag, temp_file, generate_track_id("debug")) print(f" Pipeline result - Success: {success}") print(f" Pipeline result - Track ID: {track_id}") if not success: print(" āŒ Pipeline failed - checking document status...") # Check what documents are in the system docs_by_status = await rag.get_docs_by_status("FAILED") for doc_id, doc_status in docs_by_status.items(): print(f" Failed doc: {doc_id} - {doc_status.error_msg}") except Exception as e: print(f" āŒ Pipeline error: {e}") import traceback traceback.print_exc() finally: # Cleanup if temp_file.exists(): temp_file.unlink() # Clean up RAG storage await rag.text_chunks.drop() await rag.full_docs.drop() await rag.doc_status.drop() if __name__ == "__main__": asyncio.run(debug_server_processing())