Files
railseek6/LightRAG-main/optimize_performance_monitoring.py

218 lines
7.7 KiB
Python

"""
Optimization 3: Performance monitoring for merging stage
This optimization adds detailed performance monitoring and resource tracking
to identify bottlenecks and optimize resource usage.
"""
import time
import asyncio
import psutil
import logging
from typing import Dict, Any, Optional, List
from dataclasses import dataclass
from datetime import datetime
logger = logging.getLogger(__name__)
@dataclass
class PerformanceMetrics:
"""Performance metrics for merging stage phase 2"""
doc_id: str
start_time: float
end_time: Optional[float] = None
duration: Optional[float] = None
memory_usage_mb: float = 0
cpu_percent: float = 0
entities_processed: int = 0
relationships_processed: int = 0
graph_nodes_before: int = 0
graph_nodes_after: int = 0
graph_edges_before: int = 0
graph_edges_after: int = 0
success: bool = False
error_message: Optional[str] = None
current_file_number: int = 1
total_files: int = 1
file_path: str = ""
class PerformanceMonitor:
"""Monitor and optimize performance for merging stage operations"""
def __init__(self):
self.enabled = True
self.metrics_history: List[PerformanceMetrics] = []
self.process = psutil.Process()
async def start_merging_stage_phase2(
self,
doc_id: str,
current_file_number: int = 1,
total_files: int = 1,
file_path: str = ""
) -> PerformanceMetrics:
"""Start performance monitoring for merging stage phase 2"""
if not self.enabled:
return None
metrics = PerformanceMetrics(
doc_id=doc_id,
start_time=time.time(),
current_file_number=current_file_number,
total_files=total_files,
file_path=file_path
)
# Record initial resource usage
metrics.memory_usage_mb = self.process.memory_info().rss / 1024 / 1024
metrics.cpu_percent = self.process.cpu_percent()
logger.info(f"Performance monitoring started for doc {doc_id}")
return metrics
async def record_success(self, metrics: PerformanceMetrics) -> None:
"""Record successful completion of merging stage"""
if not self.enabled or metrics is None:
return
metrics.end_time = time.time()
metrics.duration = metrics.end_time - metrics.start_time
metrics.success = True
# Record final resource usage
metrics.memory_usage_mb = self.process.memory_info().rss / 1024 / 1024
metrics.cpu_percent = self.process.cpu_percent()
self.metrics_history.append(metrics)
logger.info(
f"Performance metrics for doc {metrics.doc_id}: "
f"Duration: {metrics.duration:.2f}s, "
f"Memory: {metrics.memory_usage_mb:.1f}MB, "
f"CPU: {metrics.cpu_percent:.1f}%, "
f"Entities: {metrics.entities_processed}, "
f"Relationships: {metrics.relationships_processed}"
)
async def record_failure(self, metrics: PerformanceMetrics, error_message: str) -> None:
"""Record failed merging stage"""
if not self.enabled or metrics is None:
return
metrics.end_time = time.time()
metrics.duration = metrics.end_time - metrics.start_time
metrics.success = False
metrics.error_message = error_message
self.metrics_history.append(metrics)
logger.error(
f"Performance failure for doc {metrics.doc_id}: "
f"Duration: {metrics.duration:.2f}s, "
f"Error: {error_message}"
)
def update_graph_stats(
self,
metrics: PerformanceMetrics,
nodes_before: int,
edges_before: int,
nodes_after: int,
edges_after: int
) -> None:
"""Update graph statistics in performance metrics"""
if metrics is None:
return
metrics.graph_nodes_before = nodes_before
metrics.graph_nodes_after = nodes_after
metrics.graph_edges_before = edges_before
metrics.graph_edges_after = edges_after
def update_entity_relationship_counts(
self,
metrics: PerformanceMetrics,
entities_processed: int,
relationships_processed: int
) -> None:
"""Update entity and relationship counts"""
if metrics is None:
return
metrics.entities_processed = entities_processed
metrics.relationships_processed = relationships_processed
def get_performance_summary(self) -> Dict[str, Any]:
"""Get performance summary across all recorded metrics"""
if not self.metrics_history:
return {}
successful_runs = [m for m in self.metrics_history if m.success]
failed_runs = [m for m in self.metrics_history if not m.success]
if successful_runs:
avg_duration = sum(m.duration for m in successful_runs) / len(successful_runs)
avg_memory = sum(m.memory_usage_mb for m in successful_runs) / len(successful_runs)
avg_cpu = sum(m.cpu_percent for m in successful_runs) / len(successful_runs)
avg_entities = sum(m.entities_processed for m in successful_runs) / len(successful_runs)
avg_relationships = sum(m.relationships_processed for m in successful_runs) / len(successful_runs)
else:
avg_duration = avg_memory = avg_cpu = avg_entities = avg_relationships = 0
return {
"total_runs": len(self.metrics_history),
"successful_runs": len(successful_runs),
"failed_runs": len(failed_runs),
"success_rate": len(successful_runs) / len(self.metrics_history) if self.metrics_history else 0,
"average_duration_seconds": avg_duration,
"average_memory_mb": avg_memory,
"average_cpu_percent": avg_cpu,
"average_entities_processed": avg_entities,
"average_relationships_processed": avg_relationships,
"last_run": self.metrics_history[-1].__dict__ if self.metrics_history else None
}
def enable_monitoring(self) -> None:
"""Enable performance monitoring"""
self.enabled = True
logger.info("Performance monitoring enabled")
def disable_monitoring(self) -> None:
"""Disable performance monitoring"""
self.enabled = False
logger.info("Performance monitoring disabled")
def clear_history(self) -> None:
"""Clear performance metrics history"""
self.metrics_history.clear()
logger.info("Performance metrics history cleared")
# Global performance monitor instance
performance_monitor = PerformanceMonitor()
async def monitor_merging_stage_phase2(
doc_id: str,
current_file_number: int = 1,
total_files: int = 1,
file_path: str = ""
) -> PerformanceMetrics:
"""Convenience function to start monitoring merging stage phase 2"""
return await performance_monitor.start_merging_stage_phase2(
doc_id, current_file_number, total_files, file_path
)
async def record_merging_success(metrics: PerformanceMetrics) -> None:
"""Convenience function to record successful merging"""
await performance_monitor.record_success(metrics)
async def record_merging_failure(metrics: PerformanceMetrics, error_message: str) -> None:
"""Convenience function to record failed merging"""
await performance_monitor.record_failure(metrics, error_message)
def get_performance_report() -> Dict[str, Any]:
"""Get comprehensive performance report"""
return performance_monitor.get_performance_summary()