Files
railseek6/debug_server_processing.py

74 lines
2.5 KiB
Python

"""
Debug script to trace the server-side processing issue
"""
import sys
import os
sys.path.append('LightRAG-main')
import asyncio
from lightrag.document_processor import get_document_processor
from lightrag.api.routers.document_routes import pipeline_enqueue_file
from lightrag import LightRAG
from pathlib import Path
async def debug_server_processing():
print("🔍 Debugging server-side processing pipeline...")
# Test 1: Direct document processor (already working)
print("\n📄 Test 1: Direct Document Processor")
processor = get_document_processor()
direct_result = await processor.process_document('ocr.pdf')
print(f" Direct processing - Success: {direct_result.success}")
print(f" Direct processing - Content length: {len(direct_result.content)}")
print(f" Direct processing - Content stripped: {len(direct_result.content.strip())}")
# Test 2: Simulate server pipeline
print("\n🔄 Test 2: Simulating Server Pipeline")
# Create a temporary RAG instance for testing
from lightrag.utils import generate_track_id
# Create a minimal RAG instance with correct parameters
rag = LightRAG(
workspace="test_workspace",
enable_llm_cache_for_entity_extract=True
)
# Save the file to simulate upload
temp_file = Path("temp_ocr.pdf")
if not temp_file.exists():
import shutil
shutil.copy2('ocr.pdf', temp_file)
try:
# Test the pipeline_enqueue_file function directly
print(" Testing pipeline_enqueue_file...")
success, track_id = await pipeline_enqueue_file(rag, temp_file, generate_track_id("debug"))
print(f" Pipeline result - Success: {success}")
print(f" Pipeline result - Track ID: {track_id}")
if not success:
print(" ❌ Pipeline failed - checking document status...")
# Check what documents are in the system
docs_by_status = await rag.get_docs_by_status("FAILED")
for doc_id, doc_status in docs_by_status.items():
print(f" Failed doc: {doc_id} - {doc_status.error_msg}")
except Exception as e:
print(f" ❌ Pipeline error: {e}")
import traceback
traceback.print_exc()
finally:
# Cleanup
if temp_file.exists():
temp_file.unlink()
# Clean up RAG storage
await rag.text_chunks.drop()
await rag.full_docs.drop()
await rag.doc_status.drop()
if __name__ == "__main__":
asyncio.run(debug_server_processing())