"""
Performance benchmark script to compare LightRAG performance before and after optimizations.
Tests indexing speed and search performance with various document sizes and query loads.
"""
import asyncio
import time
import logging
import sys
import os
import statistics
from pathlib import Path
from typing import Dict, List, Tuple

# Add the parent directory to the path so we can import lightrag
sys.path.insert(0, str(Path(__file__).parent))

from debug_llm_function import create_mock_llm_and_embedding
from lightrag.lightrag import LightRAG
from lightrag.utils import EmbeddingFunc

# Configure logging
logging.basicConfig(
    level=logging.INFO,
    format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
)
logger = logging.getLogger(__name__)

class PerformanceBenchmark:
    def __init__(self):
        self.benchmark_results = {}
        self.test_document_sets = {
            'small': [
                "Artificial Intelligence (AI) is transforming industries through machine learning and deep learning applications.",
                "Machine learning algorithms enable computers to learn patterns from data without explicit programming.",
                "Deep learning uses neural networks with multiple layers to model complex patterns in data."
            ],
            'medium': [
                "Artificial Intelligence (AI) is transforming industries through machine learning and deep learning applications.",
                "Machine learning algorithms enable computers to learn patterns from data without explicit programming.",
                "Deep learning uses neural networks with multiple layers to model complex patterns in data.",
                "Natural Language Processing (NLP) allows computers to understand and generate human language.",
                "Computer vision enables machines to interpret and understand visual information from the world.",
                "AI research focuses on developing intelligent systems that can reason, learn, and adapt.",
                "Neural networks are the foundation of modern deep learning and AI systems.",
                "Reinforcement learning trains agents through trial and error interactions with environments."
            ],
            'large': [
                "Artificial Intelligence (AI) is transforming industries through machine learning and deep learning applications.",
                "Machine learning algorithms enable computers to learn patterns from data without explicit programming.",
                "Deep learning uses neural networks with multiple layers to model complex patterns in data.",
                "Natural Language Processing (NLP) allows computers to understand and generate human language.",
                "Computer vision enables machines to interpret and understand visual information from the world.",
                "AI research focuses on developing intelligent systems that can reason, learn, and adapt.",
                "Neural networks are the foundation of modern deep learning and AI systems.",
                "Reinforcement learning trains agents through trial and error interactions with environments.",
                "Transfer learning allows models to apply knowledge from one domain to another.",
                "Generative AI creates new content like text, images, and code based on learned patterns.",
                "Computer vision applications include image recognition, object detection, and scene understanding.",
                "Natural language understanding enables machines to comprehend and respond to human language.",
                "Machine learning models require large datasets for training and validation.",
                "Deep learning architectures include convolutional networks, recurrent networks, and transformers.",
                "AI ethics focuses on ensuring responsible and fair use of artificial intelligence technologies.",
                "Explainable AI aims to make machine learning models more transparent and interpretable.",
                "Edge AI enables AI processing on local devices without cloud connectivity.",
                "Federated learning allows model training across decentralized devices while preserving privacy.",
                "AutoML automates the process of applying machine learning to real-world problems.",
                "Quantum computing has potential applications in accelerating machine learning algorithms."
            ]
        }
        
        self.test_queries = [
            "artificial intelligence machine learning",
            "deep learning neural networks", 
            "natural language processing computer vision",
            "AI research and development",
            "neural networks transfer learning",
            "machine learning algorithms",
            "computer vision applications",
            "deep learning architectures",
            "AI ethics and responsible use",
            "federated learning and privacy"
        ]
    
    async def setup_lightrag(self, storage_dir: str):
        """Initialize LightRAG instance"""
        llm_func, embedding_func = create_mock_llm_and_embedding()
        
        rag = LightRAG(
            llm_model_func=llm_func,
            embedding_func=embedding_func,
            working_dir=storage_dir
        )
        
        # Initialize storages and pipeline status
        await rag.initialize_storages()
        from lightrag.kg.shared_storage import initialize_pipeline_status
        await initialize_pipeline_status()
        
        return rag
    
    async def benchmark_indexing(self, rag, documents: List[str], test_name: str) -> Dict:
        """Benchmark document indexing performance"""
        logger.info(f"Benchmarking indexing for {test_name} ({len(documents)} documents)")
        
        start_time = time.time()
        track_id = await rag.ainsert(documents)
        indexing_time = time.time() - start_time
        
        # Wait for processing to complete
        await asyncio.sleep(2)
        
        # Get processing status
        processing_status = await rag.get_processing_status()
        
        return {
            'test_name': test_name,
            'documents_count': len(documents),
            'indexing_time': indexing_time,
            'documents_per_second': len(documents) / indexing_time if indexing_time > 0 else 0,
            'processing_status': processing_status,
            'track_id': track_id
        }
    
    async def benchmark_queries(self, rag, queries: List[str], test_name: str) -> Dict:
        """Benchmark query performance"""
        logger.info(f"Benchmarking queries for {test_name} ({len(queries)} queries)")
        
        query_times = []
        response_lengths = []
        
        for query in queries:
            start_time = time.time()
            response = await rag.aquery(query)
            query_time = time.time() - start_time
            
            query_times.append(query_time)
            response_lengths.append(len(response) if response else 0)
        
        return {
            'test_name': test_name,
            'queries_count': len(queries),
            'average_query_time': statistics.mean(query_times),
            'median_query_time': statistics.median(query_times),
            'min_query_time': min(query_times),
            'max_query_time': max(query_times),
            'query_time_stddev': statistics.stdev(query_times) if len(query_times) > 1 else 0,
            'average_response_length': statistics.mean(response_lengths),
            'total_query_time': sum(query_times)
        }
    
    async def run_single_benchmark(self, storage_dir: str, test_name: str) -> Dict:
        """Run a single benchmark test"""
        logger.info(f"=== RUNNING BENCHMARK: {test_name} ===")
        
        # Setup LightRAG
        rag = await self.setup_lightrag(storage_dir)
        
        # Get appropriate document set
        if test_name == 'small_docs':
            documents = self.test_document_sets['small']
            queries = self.test_queries[:3]
        elif test_name == 'medium_docs':
            documents = self.test_document_sets['medium']
            queries = self.test_queries[:6]
        elif test_name == 'large_docs':
            documents = self.test_document_sets['large']
            queries = self.test_queries
        else:
            raise ValueError(f"Unknown test name: {test_name}")
        
        # Run indexing benchmark
        indexing_results = await self.benchmark_indexing(rag, documents, test_name)
        
        # Run query benchmark
        query_results = await self.benchmark_queries(rag, queries, test_name)
        
        # Combine results
        results = {
            'test_name': test_name,
            'indexing': indexing_results,
            'query': query_results,
            'total_time': indexing_results['indexing_time'] + query_results['total_query_time']
        }
        
        # Cleanup
        await self.cleanup_storage(storage_dir)
        
        return results
    
    async def run_comprehensive_benchmark(self):
        """Run comprehensive benchmark across all test scenarios"""
        logger.info("=== STARTING COMPREHENSIVE PERFORMANCE BENCHMARK ===")
        
        test_scenarios = ['small_docs', 'medium_docs', 'large_docs']
        all_results = {}
        
        for scenario in test_scenarios:
            storage_dir = f"./benchmark_{scenario}_storage"
            results = await self.run_single_benchmark(storage_dir, scenario)
            all_results[scenario] = results
        
        self.benchmark_results = all_results
        self.print_benchmark_results()
        
        return all_results
    
    def print_benchmark_results(self):
        """Print detailed benchmark results"""
        logger.info("=== COMPREHENSIVE PERFORMANCE BENCHMARK RESULTS ===")
        
        for scenario, results in self.benchmark_results.items():
            indexing = results['indexing']
            query = results['query']
            
            logger.info(f"\n--- {scenario.upper()} ---")
            logger.info(f"Documents: {indexing['documents_count']}")
            logger.info(f"Indexing Time: {indexing['indexing_time']:.3f}s")
            logger.info(f"Documents/Second: {indexing['documents_per_second']:.2f}")
            logger.info(f"Queries: {query['queries_count']}")
            logger.info(f"Average Query Time: {query['average_query_time']:.3f}s")
            logger.info(f"Total Time: {results['total_time']:.3f}s")
        
        # Print summary comparison
        logger.info("\n=== PERFORMANCE SUMMARY ===")
        scenarios = list(self.benchmark_results.keys())
        
        for metric in ['indexing_time', 'documents_per_second', 'average_query_time']:
            values = []
            for scenario in scenarios:
                if metric == 'indexing_time':
                    values.append(self.benchmark_results[scenario]['indexing'][metric])
                elif metric == 'documents_per_second':
                    values.append(self.benchmark_results[scenario]['indexing'][metric])
                else:  # query metrics
                    values.append(self.benchmark_results[scenario]['query'][metric])
            
            logger.info(f"\n{metric.replace('_', ' ').title()}:")
            for i, scenario in enumerate(scenarios):
                logger.info(f"  {scenario}: {values[i]:.3f}")
        
        # Calculate scalability metrics
        small_perf = self.benchmark_results['small_docs']['indexing']['documents_per_second']
        large_perf = self.benchmark_results['large_docs']['indexing']['documents_per_second']
        scalability_ratio = large_perf / small_perf if small_perf > 0 else 0
        
        logger.info(f"\nScalability Ratio (large/small): {scalability_ratio:.2f}")
        
        if scalability_ratio > 0.8:
            logger.info("✅ Good scalability - performance maintains with larger datasets")
        elif scalability_ratio > 0.5:
            logger.info("⚠️  Moderate scalability - some performance degradation with larger datasets")
        else:
            logger.info("❌ Poor scalability - significant performance degradation with larger datasets")
    
    async def cleanup_storage(self, storage_dir: str):
        """Clean up test storage directory"""
        try:
            import shutil
            if os.path.exists(storage_dir):
                shutil.rmtree(storage_dir)
                logger.info(f"Cleaned up storage: {storage_dir}")
        except Exception as e:
            logger.warning(f"Could not clean up storage {storage_dir}: {e}")

async def main():
    """Main benchmark execution"""
    benchmark = PerformanceBenchmark()
    results = await benchmark.run_comprehensive_benchmark()
    
    logger.info("🎯 PERFORMANCE BENCHMARK COMPLETED")
    return 0

if __name__ == "__main__":
    exit_code = asyncio.run(main())
    sys.exit(exit_code)