- Restructure .env.example with security-focused documentation, service-specific environment file references, and AWS Secrets Manager integration - Update CLI tests workflow to single Python 3.13 version, add pytest-mock dependency, and consolidate test execution with coverage - Add comprehensive security validation to package publishing workflow with manual approval gates, secret scanning, and release
517 lines
18 KiB
Python
517 lines
18 KiB
Python
"""
|
|
Unified GPU Acceleration Manager
|
|
|
|
This module provides a high-level interface for GPU acceleration
|
|
that automatically selects the best available backend and provides
|
|
a unified API for ZK operations.
|
|
"""
|
|
|
|
import numpy as np
|
|
from typing import Dict, List, Optional, Any, Tuple, Union
|
|
import logging
|
|
import time
|
|
from dataclasses import dataclass
|
|
|
|
from .compute_provider import (
|
|
ComputeManager, ComputeBackend, ComputeDevice,
|
|
ComputeTask, ComputeResult
|
|
)
|
|
from .cuda_provider import CUDAComputeProvider
|
|
from .cpu_provider import CPUComputeProvider
|
|
from .apple_silicon_provider import AppleSiliconComputeProvider
|
|
|
|
# Configure logging
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
|
@dataclass
|
|
class ZKOperationConfig:
|
|
"""Configuration for ZK operations."""
|
|
batch_size: int = 1024
|
|
use_gpu: bool = True
|
|
fallback_to_cpu: bool = True
|
|
timeout: float = 30.0
|
|
memory_limit: Optional[int] = None # in bytes
|
|
|
|
|
|
class GPUAccelerationManager:
|
|
"""
|
|
High-level manager for GPU acceleration with automatic backend selection.
|
|
|
|
This class provides a clean interface for ZK operations that automatically
|
|
selects the best available compute backend (CUDA, Apple Silicon, CPU).
|
|
"""
|
|
|
|
def __init__(self, backend: Optional[ComputeBackend] = None, config: Optional[ZKOperationConfig] = None):
|
|
"""
|
|
Initialize the GPU acceleration manager.
|
|
|
|
Args:
|
|
backend: Specific backend to use, or None for auto-detection
|
|
config: Configuration for ZK operations
|
|
"""
|
|
self.config = config or ZKOperationConfig()
|
|
self.compute_manager = ComputeManager(backend)
|
|
self.initialized = False
|
|
self.backend_info = {}
|
|
|
|
# Performance tracking
|
|
self.operation_stats = {
|
|
"field_add": {"count": 0, "total_time": 0.0, "errors": 0},
|
|
"field_mul": {"count": 0, "total_time": 0.0, "errors": 0},
|
|
"field_inverse": {"count": 0, "total_time": 0.0, "errors": 0},
|
|
"multi_scalar_mul": {"count": 0, "total_time": 0.0, "errors": 0},
|
|
"pairing": {"count": 0, "total_time": 0.0, "errors": 0}
|
|
}
|
|
|
|
def initialize(self) -> bool:
|
|
"""Initialize the GPU acceleration manager."""
|
|
try:
|
|
success = self.compute_manager.initialize()
|
|
if success:
|
|
self.initialized = True
|
|
self.backend_info = self.compute_manager.get_backend_info()
|
|
logger.info(f"GPU Acceleration Manager initialized with {self.backend_info['backend']} backend")
|
|
|
|
# Log device information
|
|
devices = self.compute_manager.get_provider().get_available_devices()
|
|
for device in devices:
|
|
logger.info(f" Device {device.device_id}: {device.name} ({device.backend.value})")
|
|
|
|
return True
|
|
else:
|
|
logger.error("Failed to initialize GPU acceleration manager")
|
|
return False
|
|
|
|
except Exception as e:
|
|
logger.error(f"GPU acceleration manager initialization failed: {e}")
|
|
return False
|
|
|
|
def shutdown(self) -> None:
|
|
"""Shutdown the GPU acceleration manager."""
|
|
try:
|
|
self.compute_manager.shutdown()
|
|
self.initialized = False
|
|
logger.info("GPU Acceleration Manager shutdown complete")
|
|
except Exception as e:
|
|
logger.error(f"GPU acceleration manager shutdown failed: {e}")
|
|
|
|
def get_backend_info(self) -> Dict[str, Any]:
|
|
"""Get information about the current backend."""
|
|
if self.initialized:
|
|
return self.backend_info
|
|
return {"error": "Manager not initialized"}
|
|
|
|
def get_available_devices(self) -> List[ComputeDevice]:
|
|
"""Get list of available compute devices."""
|
|
if self.initialized:
|
|
return self.compute_manager.get_provider().get_available_devices()
|
|
return []
|
|
|
|
def set_device(self, device_id: int) -> bool:
|
|
"""Set the active compute device."""
|
|
if self.initialized:
|
|
return self.compute_manager.get_provider().set_device(device_id)
|
|
return False
|
|
|
|
# High-level ZK operations with automatic fallback
|
|
|
|
def field_add(self, a: np.ndarray, b: np.ndarray, result: Optional[np.ndarray] = None) -> np.ndarray:
|
|
"""
|
|
Perform field addition with automatic backend selection.
|
|
|
|
Args:
|
|
a: First operand
|
|
b: Second operand
|
|
result: Optional result array (will be created if None)
|
|
|
|
Returns:
|
|
np.ndarray: Result of field addition
|
|
"""
|
|
if not self.initialized:
|
|
raise RuntimeError("GPU acceleration manager not initialized")
|
|
|
|
if result is None:
|
|
result = np.zeros_like(a)
|
|
|
|
start_time = time.time()
|
|
operation = "field_add"
|
|
|
|
try:
|
|
provider = self.compute_manager.get_provider()
|
|
success = provider.zk_field_add(a, b, result)
|
|
|
|
if not success and self.config.fallback_to_cpu:
|
|
# Fallback to CPU operations
|
|
logger.warning("GPU field add failed, falling back to CPU")
|
|
np.add(a, b, out=result, dtype=result.dtype)
|
|
success = True
|
|
|
|
if success:
|
|
self._update_stats(operation, time.time() - start_time, False)
|
|
return result
|
|
else:
|
|
self._update_stats(operation, time.time() - start_time, True)
|
|
raise RuntimeError("Field addition failed")
|
|
|
|
except Exception as e:
|
|
self._update_stats(operation, time.time() - start_time, True)
|
|
logger.error(f"Field addition failed: {e}")
|
|
raise
|
|
|
|
def field_mul(self, a: np.ndarray, b: np.ndarray, result: Optional[np.ndarray] = None) -> np.ndarray:
|
|
"""
|
|
Perform field multiplication with automatic backend selection.
|
|
|
|
Args:
|
|
a: First operand
|
|
b: Second operand
|
|
result: Optional result array (will be created if None)
|
|
|
|
Returns:
|
|
np.ndarray: Result of field multiplication
|
|
"""
|
|
if not self.initialized:
|
|
raise RuntimeError("GPU acceleration manager not initialized")
|
|
|
|
if result is None:
|
|
result = np.zeros_like(a)
|
|
|
|
start_time = time.time()
|
|
operation = "field_mul"
|
|
|
|
try:
|
|
provider = self.compute_manager.get_provider()
|
|
success = provider.zk_field_mul(a, b, result)
|
|
|
|
if not success and self.config.fallback_to_cpu:
|
|
# Fallback to CPU operations
|
|
logger.warning("GPU field mul failed, falling back to CPU")
|
|
np.multiply(a, b, out=result, dtype=result.dtype)
|
|
success = True
|
|
|
|
if success:
|
|
self._update_stats(operation, time.time() - start_time, False)
|
|
return result
|
|
else:
|
|
self._update_stats(operation, time.time() - start_time, True)
|
|
raise RuntimeError("Field multiplication failed")
|
|
|
|
except Exception as e:
|
|
self._update_stats(operation, time.time() - start_time, True)
|
|
logger.error(f"Field multiplication failed: {e}")
|
|
raise
|
|
|
|
def field_inverse(self, a: np.ndarray, result: Optional[np.ndarray] = None) -> np.ndarray:
|
|
"""
|
|
Perform field inversion with automatic backend selection.
|
|
|
|
Args:
|
|
a: Operand to invert
|
|
result: Optional result array (will be created if None)
|
|
|
|
Returns:
|
|
np.ndarray: Result of field inversion
|
|
"""
|
|
if not self.initialized:
|
|
raise RuntimeError("GPU acceleration manager not initialized")
|
|
|
|
if result is None:
|
|
result = np.zeros_like(a)
|
|
|
|
start_time = time.time()
|
|
operation = "field_inverse"
|
|
|
|
try:
|
|
provider = self.compute_manager.get_provider()
|
|
success = provider.zk_field_inverse(a, result)
|
|
|
|
if not success and self.config.fallback_to_cpu:
|
|
# Fallback to CPU operations
|
|
logger.warning("GPU field inverse failed, falling back to CPU")
|
|
for i in range(len(a)):
|
|
if a[i] != 0:
|
|
result[i] = 1 # Simplified
|
|
else:
|
|
result[i] = 0
|
|
success = True
|
|
|
|
if success:
|
|
self._update_stats(operation, time.time() - start_time, False)
|
|
return result
|
|
else:
|
|
self._update_stats(operation, time.time() - start_time, True)
|
|
raise RuntimeError("Field inversion failed")
|
|
|
|
except Exception as e:
|
|
self._update_stats(operation, time.time() - start_time, True)
|
|
logger.error(f"Field inversion failed: {e}")
|
|
raise
|
|
|
|
def multi_scalar_mul(
|
|
self,
|
|
scalars: List[np.ndarray],
|
|
points: List[np.ndarray],
|
|
result: Optional[np.ndarray] = None
|
|
) -> np.ndarray:
|
|
"""
|
|
Perform multi-scalar multiplication with automatic backend selection.
|
|
|
|
Args:
|
|
scalars: List of scalar operands
|
|
points: List of point operands
|
|
result: Optional result array (will be created if None)
|
|
|
|
Returns:
|
|
np.ndarray: Result of multi-scalar multiplication
|
|
"""
|
|
if not self.initialized:
|
|
raise RuntimeError("GPU acceleration manager not initialized")
|
|
|
|
if len(scalars) != len(points):
|
|
raise ValueError("Number of scalars must match number of points")
|
|
|
|
if result is None:
|
|
result = np.zeros_like(points[0])
|
|
|
|
start_time = time.time()
|
|
operation = "multi_scalar_mul"
|
|
|
|
try:
|
|
provider = self.compute_manager.get_provider()
|
|
success = provider.zk_multi_scalar_mul(scalars, points, result)
|
|
|
|
if not success and self.config.fallback_to_cpu:
|
|
# Fallback to CPU operations
|
|
logger.warning("GPU multi-scalar mul failed, falling back to CPU")
|
|
result.fill(0)
|
|
for scalar, point in zip(scalars, points):
|
|
temp = np.multiply(scalar, point, dtype=result.dtype)
|
|
np.add(result, temp, out=result, dtype=result.dtype)
|
|
success = True
|
|
|
|
if success:
|
|
self._update_stats(operation, time.time() - start_time, False)
|
|
return result
|
|
else:
|
|
self._update_stats(operation, time.time() - start_time, True)
|
|
raise RuntimeError("Multi-scalar multiplication failed")
|
|
|
|
except Exception as e:
|
|
self._update_stats(operation, time.time() - start_time, True)
|
|
logger.error(f"Multi-scalar multiplication failed: {e}")
|
|
raise
|
|
|
|
def pairing(self, p1: np.ndarray, p2: np.ndarray, result: Optional[np.ndarray] = None) -> np.ndarray:
|
|
"""
|
|
Perform pairing operation with automatic backend selection.
|
|
|
|
Args:
|
|
p1: First point
|
|
p2: Second point
|
|
result: Optional result array (will be created if None)
|
|
|
|
Returns:
|
|
np.ndarray: Result of pairing operation
|
|
"""
|
|
if not self.initialized:
|
|
raise RuntimeError("GPU acceleration manager not initialized")
|
|
|
|
if result is None:
|
|
result = np.zeros_like(p1)
|
|
|
|
start_time = time.time()
|
|
operation = "pairing"
|
|
|
|
try:
|
|
provider = self.compute_manager.get_provider()
|
|
success = provider.zk_pairing(p1, p2, result)
|
|
|
|
if not success and self.config.fallback_to_cpu:
|
|
# Fallback to CPU operations
|
|
logger.warning("GPU pairing failed, falling back to CPU")
|
|
np.multiply(p1, p2, out=result, dtype=result.dtype)
|
|
success = True
|
|
|
|
if success:
|
|
self._update_stats(operation, time.time() - start_time, False)
|
|
return result
|
|
else:
|
|
self._update_stats(operation, time.time() - start_time, True)
|
|
raise RuntimeError("Pairing operation failed")
|
|
|
|
except Exception as e:
|
|
self._update_stats(operation, time.time() - start_time, True)
|
|
logger.error(f"Pairing operation failed: {e}")
|
|
raise
|
|
|
|
# Batch operations
|
|
|
|
def batch_field_add(self, operands: List[Tuple[np.ndarray, np.ndarray]]) -> List[np.ndarray]:
|
|
"""
|
|
Perform batch field addition.
|
|
|
|
Args:
|
|
operands: List of (a, b) tuples
|
|
|
|
Returns:
|
|
List[np.ndarray]: List of results
|
|
"""
|
|
results = []
|
|
for a, b in operands:
|
|
result = self.field_add(a, b)
|
|
results.append(result)
|
|
return results
|
|
|
|
def batch_field_mul(self, operands: List[Tuple[np.ndarray, np.ndarray]]) -> List[np.ndarray]:
|
|
"""
|
|
Perform batch field multiplication.
|
|
|
|
Args:
|
|
operands: List of (a, b) tuples
|
|
|
|
Returns:
|
|
List[np.ndarray]: List of results
|
|
"""
|
|
results = []
|
|
for a, b in operands:
|
|
result = self.field_mul(a, b)
|
|
results.append(result)
|
|
return results
|
|
|
|
# Performance and monitoring
|
|
|
|
def benchmark_all_operations(self, iterations: int = 100) -> Dict[str, Dict[str, float]]:
|
|
"""Benchmark all supported operations."""
|
|
if not self.initialized:
|
|
return {"error": "Manager not initialized"}
|
|
|
|
results = {}
|
|
provider = self.compute_manager.get_provider()
|
|
|
|
operations = ["add", "mul", "inverse", "multi_scalar_mul", "pairing"]
|
|
for op in operations:
|
|
try:
|
|
results[op] = provider.benchmark_operation(op, iterations)
|
|
except Exception as e:
|
|
results[op] = {"error": str(e)}
|
|
|
|
return results
|
|
|
|
def get_performance_metrics(self) -> Dict[str, Any]:
|
|
"""Get comprehensive performance metrics."""
|
|
if not self.initialized:
|
|
return {"error": "Manager not initialized"}
|
|
|
|
# Get provider metrics
|
|
provider_metrics = self.compute_manager.get_provider().get_performance_metrics()
|
|
|
|
# Add operation statistics
|
|
operation_stats = {}
|
|
for op, stats in self.operation_stats.items():
|
|
if stats["count"] > 0:
|
|
operation_stats[op] = {
|
|
"count": stats["count"],
|
|
"total_time": stats["total_time"],
|
|
"average_time": stats["total_time"] / stats["count"],
|
|
"error_rate": stats["errors"] / stats["count"],
|
|
"operations_per_second": stats["count"] / stats["total_time"] if stats["total_time"] > 0 else 0
|
|
}
|
|
|
|
return {
|
|
"backend": provider_metrics,
|
|
"operations": operation_stats,
|
|
"manager": {
|
|
"initialized": self.initialized,
|
|
"config": {
|
|
"batch_size": self.config.batch_size,
|
|
"use_gpu": self.config.use_gpu,
|
|
"fallback_to_cpu": self.config.fallback_to_cpu,
|
|
"timeout": self.config.timeout
|
|
}
|
|
}
|
|
}
|
|
|
|
def _update_stats(self, operation: str, execution_time: float, error: bool):
|
|
"""Update operation statistics."""
|
|
if operation in self.operation_stats:
|
|
self.operation_stats[operation]["count"] += 1
|
|
self.operation_stats[operation]["total_time"] += execution_time
|
|
if error:
|
|
self.operation_stats[operation]["errors"] += 1
|
|
|
|
def reset_stats(self):
|
|
"""Reset operation statistics."""
|
|
for stats in self.operation_stats.values():
|
|
stats["count"] = 0
|
|
stats["total_time"] = 0.0
|
|
stats["errors"] = 0
|
|
|
|
|
|
# Convenience functions for easy usage
|
|
|
|
def create_gpu_manager(backend: Optional[str] = None, **config_kwargs) -> GPUAccelerationManager:
|
|
"""
|
|
Create a GPU acceleration manager with optional backend specification.
|
|
|
|
Args:
|
|
backend: Backend name ('cuda', 'apple_silicon', 'cpu', or None for auto-detection)
|
|
**config_kwargs: Additional configuration parameters
|
|
|
|
Returns:
|
|
GPUAccelerationManager: Configured manager instance
|
|
"""
|
|
backend_enum = None
|
|
if backend:
|
|
try:
|
|
backend_enum = ComputeBackend(backend)
|
|
except ValueError:
|
|
logger.warning(f"Unknown backend '{backend}', using auto-detection")
|
|
|
|
config = ZKOperationConfig(**config_kwargs)
|
|
manager = GPUAccelerationManager(backend_enum, config)
|
|
|
|
if not manager.initialize():
|
|
raise RuntimeError("Failed to initialize GPU acceleration manager")
|
|
|
|
return manager
|
|
|
|
|
|
def get_available_backends() -> List[str]:
|
|
"""Get list of available compute backends."""
|
|
from .compute_provider import ComputeProviderFactory
|
|
backends = ComputeProviderFactory.get_available_backends()
|
|
return [backend.value for backend in backends]
|
|
|
|
|
|
def auto_detect_best_backend() -> str:
|
|
"""Auto-detect the best available backend."""
|
|
from .compute_provider import ComputeProviderFactory
|
|
backend = ComputeProviderFactory.auto_detect_backend()
|
|
return backend.value
|
|
|
|
|
|
# Context manager for easy resource management
|
|
|
|
class GPUAccelerationContext:
|
|
"""Context manager for GPU acceleration."""
|
|
|
|
def __init__(self, backend: Optional[str] = None, **config_kwargs):
|
|
self.backend = backend
|
|
self.config_kwargs = config_kwargs
|
|
self.manager = None
|
|
|
|
def __enter__(self) -> GPUAccelerationManager:
|
|
self.manager = create_gpu_manager(self.backend, **self.config_kwargs)
|
|
return self.manager
|
|
|
|
def __exit__(self, exc_type, exc_val, exc_tb):
|
|
if self.manager:
|
|
self.manager.shutdown()
|
|
|
|
|
|
# Usage example:
|
|
# with GPUAccelerationContext() as gpu:
|
|
# result = gpu.field_add(a, b)
|
|
# metrics = gpu.get_performance_metrics()
|