275 lines
13 KiB
Python
275 lines
13 KiB
Python
"""
|
|
Performance benchmark script to compare LightRAG performance before and after optimizations.
|
|
Tests indexing speed and search performance with various document sizes and query loads.
|
|
"""
|
|
import asyncio
|
|
import time
|
|
import logging
|
|
import sys
|
|
import os
|
|
import statistics
|
|
from pathlib import Path
|
|
from typing import Dict, List, Tuple
|
|
|
|
# Add the parent directory to the path so we can import lightrag
|
|
sys.path.insert(0, str(Path(__file__).parent))
|
|
|
|
from debug_llm_function import create_mock_llm_and_embedding
|
|
from lightrag.lightrag import LightRAG
|
|
from lightrag.utils import EmbeddingFunc
|
|
|
|
# Configure logging
|
|
logging.basicConfig(
|
|
level=logging.INFO,
|
|
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
|
|
)
|
|
logger = logging.getLogger(__name__)
|
|
|
|
class PerformanceBenchmark:
|
|
def __init__(self):
|
|
self.benchmark_results = {}
|
|
self.test_document_sets = {
|
|
'small': [
|
|
"Artificial Intelligence (AI) is transforming industries through machine learning and deep learning applications.",
|
|
"Machine learning algorithms enable computers to learn patterns from data without explicit programming.",
|
|
"Deep learning uses neural networks with multiple layers to model complex patterns in data."
|
|
],
|
|
'medium': [
|
|
"Artificial Intelligence (AI) is transforming industries through machine learning and deep learning applications.",
|
|
"Machine learning algorithms enable computers to learn patterns from data without explicit programming.",
|
|
"Deep learning uses neural networks with multiple layers to model complex patterns in data.",
|
|
"Natural Language Processing (NLP) allows computers to understand and generate human language.",
|
|
"Computer vision enables machines to interpret and understand visual information from the world.",
|
|
"AI research focuses on developing intelligent systems that can reason, learn, and adapt.",
|
|
"Neural networks are the foundation of modern deep learning and AI systems.",
|
|
"Reinforcement learning trains agents through trial and error interactions with environments."
|
|
],
|
|
'large': [
|
|
"Artificial Intelligence (AI) is transforming industries through machine learning and deep learning applications.",
|
|
"Machine learning algorithms enable computers to learn patterns from data without explicit programming.",
|
|
"Deep learning uses neural networks with multiple layers to model complex patterns in data.",
|
|
"Natural Language Processing (NLP) allows computers to understand and generate human language.",
|
|
"Computer vision enables machines to interpret and understand visual information from the world.",
|
|
"AI research focuses on developing intelligent systems that can reason, learn, and adapt.",
|
|
"Neural networks are the foundation of modern deep learning and AI systems.",
|
|
"Reinforcement learning trains agents through trial and error interactions with environments.",
|
|
"Transfer learning allows models to apply knowledge from one domain to another.",
|
|
"Generative AI creates new content like text, images, and code based on learned patterns.",
|
|
"Computer vision applications include image recognition, object detection, and scene understanding.",
|
|
"Natural language understanding enables machines to comprehend and respond to human language.",
|
|
"Machine learning models require large datasets for training and validation.",
|
|
"Deep learning architectures include convolutional networks, recurrent networks, and transformers.",
|
|
"AI ethics focuses on ensuring responsible and fair use of artificial intelligence technologies.",
|
|
"Explainable AI aims to make machine learning models more transparent and interpretable.",
|
|
"Edge AI enables AI processing on local devices without cloud connectivity.",
|
|
"Federated learning allows model training across decentralized devices while preserving privacy.",
|
|
"AutoML automates the process of applying machine learning to real-world problems.",
|
|
"Quantum computing has potential applications in accelerating machine learning algorithms."
|
|
]
|
|
}
|
|
|
|
self.test_queries = [
|
|
"artificial intelligence machine learning",
|
|
"deep learning neural networks",
|
|
"natural language processing computer vision",
|
|
"AI research and development",
|
|
"neural networks transfer learning",
|
|
"machine learning algorithms",
|
|
"computer vision applications",
|
|
"deep learning architectures",
|
|
"AI ethics and responsible use",
|
|
"federated learning and privacy"
|
|
]
|
|
|
|
async def setup_lightrag(self, storage_dir: str):
|
|
"""Initialize LightRAG instance"""
|
|
llm_func, embedding_func = create_mock_llm_and_embedding()
|
|
|
|
rag = LightRAG(
|
|
llm_model_func=llm_func,
|
|
embedding_func=embedding_func,
|
|
working_dir=storage_dir
|
|
)
|
|
|
|
# Initialize storages and pipeline status
|
|
await rag.initialize_storages()
|
|
from lightrag.kg.shared_storage import initialize_pipeline_status
|
|
await initialize_pipeline_status()
|
|
|
|
return rag
|
|
|
|
async def benchmark_indexing(self, rag, documents: List[str], test_name: str) -> Dict:
|
|
"""Benchmark document indexing performance"""
|
|
logger.info(f"Benchmarking indexing for {test_name} ({len(documents)} documents)")
|
|
|
|
start_time = time.time()
|
|
track_id = await rag.ainsert(documents)
|
|
indexing_time = time.time() - start_time
|
|
|
|
# Wait for processing to complete
|
|
await asyncio.sleep(2)
|
|
|
|
# Get processing status
|
|
processing_status = await rag.get_processing_status()
|
|
|
|
return {
|
|
'test_name': test_name,
|
|
'documents_count': len(documents),
|
|
'indexing_time': indexing_time,
|
|
'documents_per_second': len(documents) / indexing_time if indexing_time > 0 else 0,
|
|
'processing_status': processing_status,
|
|
'track_id': track_id
|
|
}
|
|
|
|
async def benchmark_queries(self, rag, queries: List[str], test_name: str) -> Dict:
|
|
"""Benchmark query performance"""
|
|
logger.info(f"Benchmarking queries for {test_name} ({len(queries)} queries)")
|
|
|
|
query_times = []
|
|
response_lengths = []
|
|
|
|
for query in queries:
|
|
start_time = time.time()
|
|
response = await rag.aquery(query)
|
|
query_time = time.time() - start_time
|
|
|
|
query_times.append(query_time)
|
|
response_lengths.append(len(response) if response else 0)
|
|
|
|
return {
|
|
'test_name': test_name,
|
|
'queries_count': len(queries),
|
|
'average_query_time': statistics.mean(query_times),
|
|
'median_query_time': statistics.median(query_times),
|
|
'min_query_time': min(query_times),
|
|
'max_query_time': max(query_times),
|
|
'query_time_stddev': statistics.stdev(query_times) if len(query_times) > 1 else 0,
|
|
'average_response_length': statistics.mean(response_lengths),
|
|
'total_query_time': sum(query_times)
|
|
}
|
|
|
|
async def run_single_benchmark(self, storage_dir: str, test_name: str) -> Dict:
|
|
"""Run a single benchmark test"""
|
|
logger.info(f"=== RUNNING BENCHMARK: {test_name} ===")
|
|
|
|
# Setup LightRAG
|
|
rag = await self.setup_lightrag(storage_dir)
|
|
|
|
# Get appropriate document set
|
|
if test_name == 'small_docs':
|
|
documents = self.test_document_sets['small']
|
|
queries = self.test_queries[:3]
|
|
elif test_name == 'medium_docs':
|
|
documents = self.test_document_sets['medium']
|
|
queries = self.test_queries[:6]
|
|
elif test_name == 'large_docs':
|
|
documents = self.test_document_sets['large']
|
|
queries = self.test_queries
|
|
else:
|
|
raise ValueError(f"Unknown test name: {test_name}")
|
|
|
|
# Run indexing benchmark
|
|
indexing_results = await self.benchmark_indexing(rag, documents, test_name)
|
|
|
|
# Run query benchmark
|
|
query_results = await self.benchmark_queries(rag, queries, test_name)
|
|
|
|
# Combine results
|
|
results = {
|
|
'test_name': test_name,
|
|
'indexing': indexing_results,
|
|
'query': query_results,
|
|
'total_time': indexing_results['indexing_time'] + query_results['total_query_time']
|
|
}
|
|
|
|
# Cleanup
|
|
await self.cleanup_storage(storage_dir)
|
|
|
|
return results
|
|
|
|
async def run_comprehensive_benchmark(self):
|
|
"""Run comprehensive benchmark across all test scenarios"""
|
|
logger.info("=== STARTING COMPREHENSIVE PERFORMANCE BENCHMARK ===")
|
|
|
|
test_scenarios = ['small_docs', 'medium_docs', 'large_docs']
|
|
all_results = {}
|
|
|
|
for scenario in test_scenarios:
|
|
storage_dir = f"./benchmark_{scenario}_storage"
|
|
results = await self.run_single_benchmark(storage_dir, scenario)
|
|
all_results[scenario] = results
|
|
|
|
self.benchmark_results = all_results
|
|
self.print_benchmark_results()
|
|
|
|
return all_results
|
|
|
|
def print_benchmark_results(self):
|
|
"""Print detailed benchmark results"""
|
|
logger.info("=== COMPREHENSIVE PERFORMANCE BENCHMARK RESULTS ===")
|
|
|
|
for scenario, results in self.benchmark_results.items():
|
|
indexing = results['indexing']
|
|
query = results['query']
|
|
|
|
logger.info(f"\n--- {scenario.upper()} ---")
|
|
logger.info(f"Documents: {indexing['documents_count']}")
|
|
logger.info(f"Indexing Time: {indexing['indexing_time']:.3f}s")
|
|
logger.info(f"Documents/Second: {indexing['documents_per_second']:.2f}")
|
|
logger.info(f"Queries: {query['queries_count']}")
|
|
logger.info(f"Average Query Time: {query['average_query_time']:.3f}s")
|
|
logger.info(f"Total Time: {results['total_time']:.3f}s")
|
|
|
|
# Print summary comparison
|
|
logger.info("\n=== PERFORMANCE SUMMARY ===")
|
|
scenarios = list(self.benchmark_results.keys())
|
|
|
|
for metric in ['indexing_time', 'documents_per_second', 'average_query_time']:
|
|
values = []
|
|
for scenario in scenarios:
|
|
if metric == 'indexing_time':
|
|
values.append(self.benchmark_results[scenario]['indexing'][metric])
|
|
elif metric == 'documents_per_second':
|
|
values.append(self.benchmark_results[scenario]['indexing'][metric])
|
|
else: # query metrics
|
|
values.append(self.benchmark_results[scenario]['query'][metric])
|
|
|
|
logger.info(f"\n{metric.replace('_', ' ').title()}:")
|
|
for i, scenario in enumerate(scenarios):
|
|
logger.info(f" {scenario}: {values[i]:.3f}")
|
|
|
|
# Calculate scalability metrics
|
|
small_perf = self.benchmark_results['small_docs']['indexing']['documents_per_second']
|
|
large_perf = self.benchmark_results['large_docs']['indexing']['documents_per_second']
|
|
scalability_ratio = large_perf / small_perf if small_perf > 0 else 0
|
|
|
|
logger.info(f"\nScalability Ratio (large/small): {scalability_ratio:.2f}")
|
|
|
|
if scalability_ratio > 0.8:
|
|
logger.info("✅ Good scalability - performance maintains with larger datasets")
|
|
elif scalability_ratio > 0.5:
|
|
logger.info("⚠️ Moderate scalability - some performance degradation with larger datasets")
|
|
else:
|
|
logger.info("❌ Poor scalability - significant performance degradation with larger datasets")
|
|
|
|
async def cleanup_storage(self, storage_dir: str):
|
|
"""Clean up test storage directory"""
|
|
try:
|
|
import shutil
|
|
if os.path.exists(storage_dir):
|
|
shutil.rmtree(storage_dir)
|
|
logger.info(f"Cleaned up storage: {storage_dir}")
|
|
except Exception as e:
|
|
logger.warning(f"Could not clean up storage {storage_dir}: {e}")
|
|
|
|
async def main():
|
|
"""Main benchmark execution"""
|
|
benchmark = PerformanceBenchmark()
|
|
results = await benchmark.run_comprehensive_benchmark()
|
|
|
|
logger.info("🎯 PERFORMANCE BENCHMARK COMPLETED")
|
|
return 0
|
|
|
|
if __name__ == "__main__":
|
|
exit_code = asyncio.run(main())
|
|
sys.exit(exit_code) |