"""
Unified GPU Acceleration Manager

This module provides a high-level interface for GPU acceleration
that automatically selects the best available backend and provides
a unified API for ZK operations.
"""

import numpy as np
from typing import Dict, List, Optional, Any, Tuple, Union
import logging
import time
from dataclasses import dataclass

from .compute_provider import (
    ComputeManager, ComputeBackend, ComputeDevice, 
    ComputeTask, ComputeResult
)
from .cuda_provider import CUDAComputeProvider
from .cpu_provider import CPUComputeProvider
from .apple_silicon_provider import AppleSiliconComputeProvider

# Configure logging
logger = logging.getLogger(__name__)


@dataclass
class ZKOperationConfig:
    """Configuration for ZK operations."""
    batch_size: int = 1024
    use_gpu: bool = True
    fallback_to_cpu: bool = True
    timeout: float = 30.0
    memory_limit: Optional[int] = None  # in bytes


class GPUAccelerationManager:
    """
    High-level manager for GPU acceleration with automatic backend selection.
    
    This class provides a clean interface for ZK operations that automatically
    selects the best available compute backend (CUDA, Apple Silicon, CPU).
    """
    
    def __init__(self, backend: Optional[ComputeBackend] = None, config: Optional[ZKOperationConfig] = None):
        """
        Initialize the GPU acceleration manager.
        
        Args:
            backend: Specific backend to use, or None for auto-detection
            config: Configuration for ZK operations
        """
        self.config = config or ZKOperationConfig()
        self.compute_manager = ComputeManager(backend)
        self.initialized = False
        self.backend_info = {}
        
        # Performance tracking
        self.operation_stats = {
            "field_add": {"count": 0, "total_time": 0.0, "errors": 0},
            "field_mul": {"count": 0, "total_time": 0.0, "errors": 0},
            "field_inverse": {"count": 0, "total_time": 0.0, "errors": 0},
            "multi_scalar_mul": {"count": 0, "total_time": 0.0, "errors": 0},
            "pairing": {"count": 0, "total_time": 0.0, "errors": 0}
        }
    
    def initialize(self) -> bool:
        """Initialize the GPU acceleration manager."""
        try:
            success = self.compute_manager.initialize()
            if success:
                self.initialized = True
                self.backend_info = self.compute_manager.get_backend_info()
                logger.info(f"GPU Acceleration Manager initialized with {self.backend_info['backend']} backend")
                
                # Log device information
                devices = self.compute_manager.get_provider().get_available_devices()
                for device in devices:
                    logger.info(f"  Device {device.device_id}: {device.name} ({device.backend.value})")
                
                return True
            else:
                logger.error("Failed to initialize GPU acceleration manager")
                return False
                
        except Exception as e:
            logger.error(f"GPU acceleration manager initialization failed: {e}")
            return False
    
    def shutdown(self) -> None:
        """Shutdown the GPU acceleration manager."""
        try:
            self.compute_manager.shutdown()
            self.initialized = False
            logger.info("GPU Acceleration Manager shutdown complete")
        except Exception as e:
            logger.error(f"GPU acceleration manager shutdown failed: {e}")
    
    def get_backend_info(self) -> Dict[str, Any]:
        """Get information about the current backend."""
        if self.initialized:
            return self.backend_info
        return {"error": "Manager not initialized"}
    
    def get_available_devices(self) -> List[ComputeDevice]:
        """Get list of available compute devices."""
        if self.initialized:
            return self.compute_manager.get_provider().get_available_devices()
        return []
    
    def set_device(self, device_id: int) -> bool:
        """Set the active compute device."""
        if self.initialized:
            return self.compute_manager.get_provider().set_device(device_id)
        return False
    
    # High-level ZK operations with automatic fallback
    
    def field_add(self, a: np.ndarray, b: np.ndarray, result: Optional[np.ndarray] = None) -> np.ndarray:
        """
        Perform field addition with automatic backend selection.
        
        Args:
            a: First operand
            b: Second operand
            result: Optional result array (will be created if None)
            
        Returns:
            np.ndarray: Result of field addition
        """
        if not self.initialized:
            raise RuntimeError("GPU acceleration manager not initialized")
        
        if result is None:
            result = np.zeros_like(a)
        
        start_time = time.time()
        operation = "field_add"
        
        try:
            provider = self.compute_manager.get_provider()
            success = provider.zk_field_add(a, b, result)
            
            if not success and self.config.fallback_to_cpu:
                # Fallback to CPU operations
                logger.warning("GPU field add failed, falling back to CPU")
                np.add(a, b, out=result, dtype=result.dtype)
                success = True
            
            if success:
                self._update_stats(operation, time.time() - start_time, False)
                return result
            else:
                self._update_stats(operation, time.time() - start_time, True)
                raise RuntimeError("Field addition failed")
                
        except Exception as e:
            self._update_stats(operation, time.time() - start_time, True)
            logger.error(f"Field addition failed: {e}")
            raise
    
    def field_mul(self, a: np.ndarray, b: np.ndarray, result: Optional[np.ndarray] = None) -> np.ndarray:
        """
        Perform field multiplication with automatic backend selection.
        
        Args:
            a: First operand
            b: Second operand
            result: Optional result array (will be created if None)
            
        Returns:
            np.ndarray: Result of field multiplication
        """
        if not self.initialized:
            raise RuntimeError("GPU acceleration manager not initialized")
        
        if result is None:
            result = np.zeros_like(a)
        
        start_time = time.time()
        operation = "field_mul"
        
        try:
            provider = self.compute_manager.get_provider()
            success = provider.zk_field_mul(a, b, result)
            
            if not success and self.config.fallback_to_cpu:
                # Fallback to CPU operations
                logger.warning("GPU field mul failed, falling back to CPU")
                np.multiply(a, b, out=result, dtype=result.dtype)
                success = True
            
            if success:
                self._update_stats(operation, time.time() - start_time, False)
                return result
            else:
                self._update_stats(operation, time.time() - start_time, True)
                raise RuntimeError("Field multiplication failed")
                
        except Exception as e:
            self._update_stats(operation, time.time() - start_time, True)
            logger.error(f"Field multiplication failed: {e}")
            raise
    
    def field_inverse(self, a: np.ndarray, result: Optional[np.ndarray] = None) -> np.ndarray:
        """
        Perform field inversion with automatic backend selection.
        
        Args:
            a: Operand to invert
            result: Optional result array (will be created if None)
            
        Returns:
            np.ndarray: Result of field inversion
        """
        if not self.initialized:
            raise RuntimeError("GPU acceleration manager not initialized")
        
        if result is None:
            result = np.zeros_like(a)
        
        start_time = time.time()
        operation = "field_inverse"
        
        try:
            provider = self.compute_manager.get_provider()
            success = provider.zk_field_inverse(a, result)
            
            if not success and self.config.fallback_to_cpu:
                # Fallback to CPU operations
                logger.warning("GPU field inverse failed, falling back to CPU")
                for i in range(len(a)):
                    if a[i] != 0:
                        result[i] = 1  # Simplified
                    else:
                        result[i] = 0
                success = True
            
            if success:
                self._update_stats(operation, time.time() - start_time, False)
                return result
            else:
                self._update_stats(operation, time.time() - start_time, True)
                raise RuntimeError("Field inversion failed")
                
        except Exception as e:
            self._update_stats(operation, time.time() - start_time, True)
            logger.error(f"Field inversion failed: {e}")
            raise
    
    def multi_scalar_mul(
        self,
        scalars: List[np.ndarray],
        points: List[np.ndarray],
        result: Optional[np.ndarray] = None
    ) -> np.ndarray:
        """
        Perform multi-scalar multiplication with automatic backend selection.
        
        Args:
            scalars: List of scalar operands
            points: List of point operands
            result: Optional result array (will be created if None)
            
        Returns:
            np.ndarray: Result of multi-scalar multiplication
        """
        if not self.initialized:
            raise RuntimeError("GPU acceleration manager not initialized")
        
        if len(scalars) != len(points):
            raise ValueError("Number of scalars must match number of points")
        
        if result is None:
            result = np.zeros_like(points[0])
        
        start_time = time.time()
        operation = "multi_scalar_mul"
        
        try:
            provider = self.compute_manager.get_provider()
            success = provider.zk_multi_scalar_mul(scalars, points, result)
            
            if not success and self.config.fallback_to_cpu:
                # Fallback to CPU operations
                logger.warning("GPU multi-scalar mul failed, falling back to CPU")
                result.fill(0)
                for scalar, point in zip(scalars, points):
                    temp = np.multiply(scalar, point, dtype=result.dtype)
                    np.add(result, temp, out=result, dtype=result.dtype)
                success = True
            
            if success:
                self._update_stats(operation, time.time() - start_time, False)
                return result
            else:
                self._update_stats(operation, time.time() - start_time, True)
                raise RuntimeError("Multi-scalar multiplication failed")
                
        except Exception as e:
            self._update_stats(operation, time.time() - start_time, True)
            logger.error(f"Multi-scalar multiplication failed: {e}")
            raise
    
    def pairing(self, p1: np.ndarray, p2: np.ndarray, result: Optional[np.ndarray] = None) -> np.ndarray:
        """
        Perform pairing operation with automatic backend selection.
        
        Args:
            p1: First point
            p2: Second point
            result: Optional result array (will be created if None)
            
        Returns:
            np.ndarray: Result of pairing operation
        """
        if not self.initialized:
            raise RuntimeError("GPU acceleration manager not initialized")
        
        if result is None:
            result = np.zeros_like(p1)
        
        start_time = time.time()
        operation = "pairing"
        
        try:
            provider = self.compute_manager.get_provider()
            success = provider.zk_pairing(p1, p2, result)
            
            if not success and self.config.fallback_to_cpu:
                # Fallback to CPU operations
                logger.warning("GPU pairing failed, falling back to CPU")
                np.multiply(p1, p2, out=result, dtype=result.dtype)
                success = True
            
            if success:
                self._update_stats(operation, time.time() - start_time, False)
                return result
            else:
                self._update_stats(operation, time.time() - start_time, True)
                raise RuntimeError("Pairing operation failed")
                
        except Exception as e:
            self._update_stats(operation, time.time() - start_time, True)
            logger.error(f"Pairing operation failed: {e}")
            raise
    
    # Batch operations
    
    def batch_field_add(self, operands: List[Tuple[np.ndarray, np.ndarray]]) -> List[np.ndarray]:
        """
        Perform batch field addition.
        
        Args:
            operands: List of (a, b) tuples
            
        Returns:
            List[np.ndarray]: List of results
        """
        results = []
        for a, b in operands:
            result = self.field_add(a, b)
            results.append(result)
        return results
    
    def batch_field_mul(self, operands: List[Tuple[np.ndarray, np.ndarray]]) -> List[np.ndarray]:
        """
        Perform batch field multiplication.
        
        Args:
            operands: List of (a, b) tuples
            
        Returns:
            List[np.ndarray]: List of results
        """
        results = []
        for a, b in operands:
            result = self.field_mul(a, b)
            results.append(result)
        return results
    
    # Performance and monitoring
    
    def benchmark_all_operations(self, iterations: int = 100) -> Dict[str, Dict[str, float]]:
        """Benchmark all supported operations."""
        if not self.initialized:
            return {"error": "Manager not initialized"}
        
        results = {}
        provider = self.compute_manager.get_provider()
        
        operations = ["add", "mul", "inverse", "multi_scalar_mul", "pairing"]
        for op in operations:
            try:
                results[op] = provider.benchmark_operation(op, iterations)
            except Exception as e:
                results[op] = {"error": str(e)}
        
        return results
    
    def get_performance_metrics(self) -> Dict[str, Any]:
        """Get comprehensive performance metrics."""
        if not self.initialized:
            return {"error": "Manager not initialized"}
        
        # Get provider metrics
        provider_metrics = self.compute_manager.get_provider().get_performance_metrics()
        
        # Add operation statistics
        operation_stats = {}
        for op, stats in self.operation_stats.items():
            if stats["count"] > 0:
                operation_stats[op] = {
                    "count": stats["count"],
                    "total_time": stats["total_time"],
                    "average_time": stats["total_time"] / stats["count"],
                    "error_rate": stats["errors"] / stats["count"],
                    "operations_per_second": stats["count"] / stats["total_time"] if stats["total_time"] > 0 else 0
                }
        
        return {
            "backend": provider_metrics,
            "operations": operation_stats,
            "manager": {
                "initialized": self.initialized,
                "config": {
                    "batch_size": self.config.batch_size,
                    "use_gpu": self.config.use_gpu,
                    "fallback_to_cpu": self.config.fallback_to_cpu,
                    "timeout": self.config.timeout
                }
            }
        }
    
    def _update_stats(self, operation: str, execution_time: float, error: bool):
        """Update operation statistics."""
        if operation in self.operation_stats:
            self.operation_stats[operation]["count"] += 1
            self.operation_stats[operation]["total_time"] += execution_time
            if error:
                self.operation_stats[operation]["errors"] += 1
    
    def reset_stats(self):
        """Reset operation statistics."""
        for stats in self.operation_stats.values():
            stats["count"] = 0
            stats["total_time"] = 0.0
            stats["errors"] = 0


# Convenience functions for easy usage

def create_gpu_manager(backend: Optional[str] = None, **config_kwargs) -> GPUAccelerationManager:
    """
    Create a GPU acceleration manager with optional backend specification.
    
    Args:
        backend: Backend name ('cuda', 'apple_silicon', 'cpu', or None for auto-detection)
        **config_kwargs: Additional configuration parameters
        
    Returns:
        GPUAccelerationManager: Configured manager instance
    """
    backend_enum = None
    if backend:
        try:
            backend_enum = ComputeBackend(backend)
        except ValueError:
            logger.warning(f"Unknown backend '{backend}', using auto-detection")
    
    config = ZKOperationConfig(**config_kwargs)
    manager = GPUAccelerationManager(backend_enum, config)
    
    if not manager.initialize():
        raise RuntimeError("Failed to initialize GPU acceleration manager")
    
    return manager


def get_available_backends() -> List[str]:
    """Get list of available compute backends."""
    from .compute_provider import ComputeProviderFactory
    backends = ComputeProviderFactory.get_available_backends()
    return [backend.value for backend in backends]


def auto_detect_best_backend() -> str:
    """Auto-detect the best available backend."""
    from .compute_provider import ComputeProviderFactory
    backend = ComputeProviderFactory.auto_detect_backend()
    return backend.value


# Context manager for easy resource management

class GPUAccelerationContext:
    """Context manager for GPU acceleration."""
    
    def __init__(self, backend: Optional[str] = None, **config_kwargs):
        self.backend = backend
        self.config_kwargs = config_kwargs
        self.manager = None
    
    def __enter__(self) -> GPUAccelerationManager:
        self.manager = create_gpu_manager(self.backend, **self.config_kwargs)
        return self.manager
    
    def __exit__(self, exc_type, exc_val, exc_tb):
        if self.manager:
            self.manager.shutdown()


# Usage example:
# with GPUAccelerationContext() as gpu:
#     result = gpu.field_add(a, b)
#     metrics = gpu.get_performance_metrics()