Files
aitbc/gpu_acceleration/gpu_manager.py
oib f353e00172 chore(security): enhance environment configuration, CI workflows, and wallet daemon with security improvements
- Restructure .env.example with security-focused documentation, service-specific environment file references, and AWS Secrets Manager integration
- Update CLI tests workflow to single Python 3.13 version, add pytest-mock dependency, and consolidate test execution with coverage
- Add comprehensive security validation to package publishing workflow with manual approval gates, secret scanning, and release
2026-03-03 10:33:46 +01:00

517 lines
18 KiB
Python

"""
Unified GPU Acceleration Manager
This module provides a high-level interface for GPU acceleration
that automatically selects the best available backend and provides
a unified API for ZK operations.
"""
import numpy as np
from typing import Dict, List, Optional, Any, Tuple, Union
import logging
import time
from dataclasses import dataclass
from .compute_provider import (
ComputeManager, ComputeBackend, ComputeDevice,
ComputeTask, ComputeResult
)
from .cuda_provider import CUDAComputeProvider
from .cpu_provider import CPUComputeProvider
from .apple_silicon_provider import AppleSiliconComputeProvider
# Configure logging
logger = logging.getLogger(__name__)
@dataclass
class ZKOperationConfig:
"""Configuration for ZK operations."""
batch_size: int = 1024
use_gpu: bool = True
fallback_to_cpu: bool = True
timeout: float = 30.0
memory_limit: Optional[int] = None # in bytes
class GPUAccelerationManager:
"""
High-level manager for GPU acceleration with automatic backend selection.
This class provides a clean interface for ZK operations that automatically
selects the best available compute backend (CUDA, Apple Silicon, CPU).
"""
def __init__(self, backend: Optional[ComputeBackend] = None, config: Optional[ZKOperationConfig] = None):
"""
Initialize the GPU acceleration manager.
Args:
backend: Specific backend to use, or None for auto-detection
config: Configuration for ZK operations
"""
self.config = config or ZKOperationConfig()
self.compute_manager = ComputeManager(backend)
self.initialized = False
self.backend_info = {}
# Performance tracking
self.operation_stats = {
"field_add": {"count": 0, "total_time": 0.0, "errors": 0},
"field_mul": {"count": 0, "total_time": 0.0, "errors": 0},
"field_inverse": {"count": 0, "total_time": 0.0, "errors": 0},
"multi_scalar_mul": {"count": 0, "total_time": 0.0, "errors": 0},
"pairing": {"count": 0, "total_time": 0.0, "errors": 0}
}
def initialize(self) -> bool:
"""Initialize the GPU acceleration manager."""
try:
success = self.compute_manager.initialize()
if success:
self.initialized = True
self.backend_info = self.compute_manager.get_backend_info()
logger.info(f"GPU Acceleration Manager initialized with {self.backend_info['backend']} backend")
# Log device information
devices = self.compute_manager.get_provider().get_available_devices()
for device in devices:
logger.info(f" Device {device.device_id}: {device.name} ({device.backend.value})")
return True
else:
logger.error("Failed to initialize GPU acceleration manager")
return False
except Exception as e:
logger.error(f"GPU acceleration manager initialization failed: {e}")
return False
def shutdown(self) -> None:
"""Shutdown the GPU acceleration manager."""
try:
self.compute_manager.shutdown()
self.initialized = False
logger.info("GPU Acceleration Manager shutdown complete")
except Exception as e:
logger.error(f"GPU acceleration manager shutdown failed: {e}")
def get_backend_info(self) -> Dict[str, Any]:
"""Get information about the current backend."""
if self.initialized:
return self.backend_info
return {"error": "Manager not initialized"}
def get_available_devices(self) -> List[ComputeDevice]:
"""Get list of available compute devices."""
if self.initialized:
return self.compute_manager.get_provider().get_available_devices()
return []
def set_device(self, device_id: int) -> bool:
"""Set the active compute device."""
if self.initialized:
return self.compute_manager.get_provider().set_device(device_id)
return False
# High-level ZK operations with automatic fallback
def field_add(self, a: np.ndarray, b: np.ndarray, result: Optional[np.ndarray] = None) -> np.ndarray:
"""
Perform field addition with automatic backend selection.
Args:
a: First operand
b: Second operand
result: Optional result array (will be created if None)
Returns:
np.ndarray: Result of field addition
"""
if not self.initialized:
raise RuntimeError("GPU acceleration manager not initialized")
if result is None:
result = np.zeros_like(a)
start_time = time.time()
operation = "field_add"
try:
provider = self.compute_manager.get_provider()
success = provider.zk_field_add(a, b, result)
if not success and self.config.fallback_to_cpu:
# Fallback to CPU operations
logger.warning("GPU field add failed, falling back to CPU")
np.add(a, b, out=result, dtype=result.dtype)
success = True
if success:
self._update_stats(operation, time.time() - start_time, False)
return result
else:
self._update_stats(operation, time.time() - start_time, True)
raise RuntimeError("Field addition failed")
except Exception as e:
self._update_stats(operation, time.time() - start_time, True)
logger.error(f"Field addition failed: {e}")
raise
def field_mul(self, a: np.ndarray, b: np.ndarray, result: Optional[np.ndarray] = None) -> np.ndarray:
"""
Perform field multiplication with automatic backend selection.
Args:
a: First operand
b: Second operand
result: Optional result array (will be created if None)
Returns:
np.ndarray: Result of field multiplication
"""
if not self.initialized:
raise RuntimeError("GPU acceleration manager not initialized")
if result is None:
result = np.zeros_like(a)
start_time = time.time()
operation = "field_mul"
try:
provider = self.compute_manager.get_provider()
success = provider.zk_field_mul(a, b, result)
if not success and self.config.fallback_to_cpu:
# Fallback to CPU operations
logger.warning("GPU field mul failed, falling back to CPU")
np.multiply(a, b, out=result, dtype=result.dtype)
success = True
if success:
self._update_stats(operation, time.time() - start_time, False)
return result
else:
self._update_stats(operation, time.time() - start_time, True)
raise RuntimeError("Field multiplication failed")
except Exception as e:
self._update_stats(operation, time.time() - start_time, True)
logger.error(f"Field multiplication failed: {e}")
raise
def field_inverse(self, a: np.ndarray, result: Optional[np.ndarray] = None) -> np.ndarray:
"""
Perform field inversion with automatic backend selection.
Args:
a: Operand to invert
result: Optional result array (will be created if None)
Returns:
np.ndarray: Result of field inversion
"""
if not self.initialized:
raise RuntimeError("GPU acceleration manager not initialized")
if result is None:
result = np.zeros_like(a)
start_time = time.time()
operation = "field_inverse"
try:
provider = self.compute_manager.get_provider()
success = provider.zk_field_inverse(a, result)
if not success and self.config.fallback_to_cpu:
# Fallback to CPU operations
logger.warning("GPU field inverse failed, falling back to CPU")
for i in range(len(a)):
if a[i] != 0:
result[i] = 1 # Simplified
else:
result[i] = 0
success = True
if success:
self._update_stats(operation, time.time() - start_time, False)
return result
else:
self._update_stats(operation, time.time() - start_time, True)
raise RuntimeError("Field inversion failed")
except Exception as e:
self._update_stats(operation, time.time() - start_time, True)
logger.error(f"Field inversion failed: {e}")
raise
def multi_scalar_mul(
self,
scalars: List[np.ndarray],
points: List[np.ndarray],
result: Optional[np.ndarray] = None
) -> np.ndarray:
"""
Perform multi-scalar multiplication with automatic backend selection.
Args:
scalars: List of scalar operands
points: List of point operands
result: Optional result array (will be created if None)
Returns:
np.ndarray: Result of multi-scalar multiplication
"""
if not self.initialized:
raise RuntimeError("GPU acceleration manager not initialized")
if len(scalars) != len(points):
raise ValueError("Number of scalars must match number of points")
if result is None:
result = np.zeros_like(points[0])
start_time = time.time()
operation = "multi_scalar_mul"
try:
provider = self.compute_manager.get_provider()
success = provider.zk_multi_scalar_mul(scalars, points, result)
if not success and self.config.fallback_to_cpu:
# Fallback to CPU operations
logger.warning("GPU multi-scalar mul failed, falling back to CPU")
result.fill(0)
for scalar, point in zip(scalars, points):
temp = np.multiply(scalar, point, dtype=result.dtype)
np.add(result, temp, out=result, dtype=result.dtype)
success = True
if success:
self._update_stats(operation, time.time() - start_time, False)
return result
else:
self._update_stats(operation, time.time() - start_time, True)
raise RuntimeError("Multi-scalar multiplication failed")
except Exception as e:
self._update_stats(operation, time.time() - start_time, True)
logger.error(f"Multi-scalar multiplication failed: {e}")
raise
def pairing(self, p1: np.ndarray, p2: np.ndarray, result: Optional[np.ndarray] = None) -> np.ndarray:
"""
Perform pairing operation with automatic backend selection.
Args:
p1: First point
p2: Second point
result: Optional result array (will be created if None)
Returns:
np.ndarray: Result of pairing operation
"""
if not self.initialized:
raise RuntimeError("GPU acceleration manager not initialized")
if result is None:
result = np.zeros_like(p1)
start_time = time.time()
operation = "pairing"
try:
provider = self.compute_manager.get_provider()
success = provider.zk_pairing(p1, p2, result)
if not success and self.config.fallback_to_cpu:
# Fallback to CPU operations
logger.warning("GPU pairing failed, falling back to CPU")
np.multiply(p1, p2, out=result, dtype=result.dtype)
success = True
if success:
self._update_stats(operation, time.time() - start_time, False)
return result
else:
self._update_stats(operation, time.time() - start_time, True)
raise RuntimeError("Pairing operation failed")
except Exception as e:
self._update_stats(operation, time.time() - start_time, True)
logger.error(f"Pairing operation failed: {e}")
raise
# Batch operations
def batch_field_add(self, operands: List[Tuple[np.ndarray, np.ndarray]]) -> List[np.ndarray]:
"""
Perform batch field addition.
Args:
operands: List of (a, b) tuples
Returns:
List[np.ndarray]: List of results
"""
results = []
for a, b in operands:
result = self.field_add(a, b)
results.append(result)
return results
def batch_field_mul(self, operands: List[Tuple[np.ndarray, np.ndarray]]) -> List[np.ndarray]:
"""
Perform batch field multiplication.
Args:
operands: List of (a, b) tuples
Returns:
List[np.ndarray]: List of results
"""
results = []
for a, b in operands:
result = self.field_mul(a, b)
results.append(result)
return results
# Performance and monitoring
def benchmark_all_operations(self, iterations: int = 100) -> Dict[str, Dict[str, float]]:
"""Benchmark all supported operations."""
if not self.initialized:
return {"error": "Manager not initialized"}
results = {}
provider = self.compute_manager.get_provider()
operations = ["add", "mul", "inverse", "multi_scalar_mul", "pairing"]
for op in operations:
try:
results[op] = provider.benchmark_operation(op, iterations)
except Exception as e:
results[op] = {"error": str(e)}
return results
def get_performance_metrics(self) -> Dict[str, Any]:
"""Get comprehensive performance metrics."""
if not self.initialized:
return {"error": "Manager not initialized"}
# Get provider metrics
provider_metrics = self.compute_manager.get_provider().get_performance_metrics()
# Add operation statistics
operation_stats = {}
for op, stats in self.operation_stats.items():
if stats["count"] > 0:
operation_stats[op] = {
"count": stats["count"],
"total_time": stats["total_time"],
"average_time": stats["total_time"] / stats["count"],
"error_rate": stats["errors"] / stats["count"],
"operations_per_second": stats["count"] / stats["total_time"] if stats["total_time"] > 0 else 0
}
return {
"backend": provider_metrics,
"operations": operation_stats,
"manager": {
"initialized": self.initialized,
"config": {
"batch_size": self.config.batch_size,
"use_gpu": self.config.use_gpu,
"fallback_to_cpu": self.config.fallback_to_cpu,
"timeout": self.config.timeout
}
}
}
def _update_stats(self, operation: str, execution_time: float, error: bool):
"""Update operation statistics."""
if operation in self.operation_stats:
self.operation_stats[operation]["count"] += 1
self.operation_stats[operation]["total_time"] += execution_time
if error:
self.operation_stats[operation]["errors"] += 1
def reset_stats(self):
"""Reset operation statistics."""
for stats in self.operation_stats.values():
stats["count"] = 0
stats["total_time"] = 0.0
stats["errors"] = 0
# Convenience functions for easy usage
def create_gpu_manager(backend: Optional[str] = None, **config_kwargs) -> GPUAccelerationManager:
"""
Create a GPU acceleration manager with optional backend specification.
Args:
backend: Backend name ('cuda', 'apple_silicon', 'cpu', or None for auto-detection)
**config_kwargs: Additional configuration parameters
Returns:
GPUAccelerationManager: Configured manager instance
"""
backend_enum = None
if backend:
try:
backend_enum = ComputeBackend(backend)
except ValueError:
logger.warning(f"Unknown backend '{backend}', using auto-detection")
config = ZKOperationConfig(**config_kwargs)
manager = GPUAccelerationManager(backend_enum, config)
if not manager.initialize():
raise RuntimeError("Failed to initialize GPU acceleration manager")
return manager
def get_available_backends() -> List[str]:
"""Get list of available compute backends."""
from .compute_provider import ComputeProviderFactory
backends = ComputeProviderFactory.get_available_backends()
return [backend.value for backend in backends]
def auto_detect_best_backend() -> str:
"""Auto-detect the best available backend."""
from .compute_provider import ComputeProviderFactory
backend = ComputeProviderFactory.auto_detect_backend()
return backend.value
# Context manager for easy resource management
class GPUAccelerationContext:
"""Context manager for GPU acceleration."""
def __init__(self, backend: Optional[str] = None, **config_kwargs):
self.backend = backend
self.config_kwargs = config_kwargs
self.manager = None
def __enter__(self) -> GPUAccelerationManager:
self.manager = create_gpu_manager(self.backend, **self.config_kwargs)
return self.manager
def __exit__(self, exc_type, exc_val, exc_tb):
if self.manager:
self.manager.shutdown()
# Usage example:
# with GPUAccelerationContext() as gpu:
# result = gpu.field_add(a, b)
# metrics = gpu.get_performance_metrics()