chore(security): enhance environment configuration, CI workflows, and wallet daemon with security improvements
- Restructure .env.example with security-focused documentation, service-specific environment file references, and AWS Secrets Manager integration - Update CLI tests workflow to single Python 3.13 version, add pytest-mock dependency, and consolidate test execution with coverage - Add comprehensive security validation to package publishing workflow with manual approval gates, secret scanning, and release
This commit is contained in:
466
gpu_acceleration/compute_provider.py
Normal file
466
gpu_acceleration/compute_provider.py
Normal file
@@ -0,0 +1,466 @@
|
||||
"""
|
||||
GPU Compute Provider Abstract Interface
|
||||
|
||||
This module defines the abstract interface for GPU compute providers,
|
||||
allowing different backends (CUDA, ROCm, Apple Silicon, CPU) to be
|
||||
swapped seamlessly without changing business logic.
|
||||
"""
|
||||
|
||||
from abc import ABC, abstractmethod
|
||||
from typing import Dict, List, Optional, Any, Tuple
|
||||
from dataclasses import dataclass
|
||||
from enum import Enum
|
||||
import numpy as np
|
||||
|
||||
|
||||
class ComputeBackend(Enum):
|
||||
"""Available compute backends"""
|
||||
CUDA = "cuda"
|
||||
ROCM = "rocm"
|
||||
APPLE_SILICON = "apple_silicon"
|
||||
CPU = "cpu"
|
||||
OPENCL = "opencl"
|
||||
|
||||
|
||||
@dataclass
|
||||
class ComputeDevice:
|
||||
"""Information about a compute device"""
|
||||
device_id: int
|
||||
name: str
|
||||
backend: ComputeBackend
|
||||
memory_total: int # in bytes
|
||||
memory_available: int # in bytes
|
||||
compute_capability: Optional[str] = None
|
||||
is_available: bool = True
|
||||
temperature: Optional[float] = None # in Celsius
|
||||
utilization: Optional[float] = None # percentage
|
||||
|
||||
|
||||
@dataclass
|
||||
class ComputeTask:
|
||||
"""A compute task to be executed"""
|
||||
task_id: str
|
||||
operation: str
|
||||
data: Any
|
||||
parameters: Dict[str, Any]
|
||||
priority: int = 0
|
||||
timeout: Optional[float] = None
|
||||
|
||||
|
||||
@dataclass
|
||||
class ComputeResult:
|
||||
"""Result of a compute task"""
|
||||
task_id: str
|
||||
success: bool
|
||||
result: Any = None
|
||||
error: Optional[str] = None
|
||||
execution_time: float = 0.0
|
||||
memory_used: int = 0 # in bytes
|
||||
|
||||
|
||||
class ComputeProvider(ABC):
|
||||
"""
|
||||
Abstract base class for GPU compute providers.
|
||||
|
||||
This interface defines the contract that all GPU compute providers
|
||||
must implement, allowing for seamless backend swapping.
|
||||
"""
|
||||
|
||||
@abstractmethod
|
||||
def initialize(self) -> bool:
|
||||
"""
|
||||
Initialize the compute provider.
|
||||
|
||||
Returns:
|
||||
bool: True if initialization successful, False otherwise
|
||||
"""
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
def shutdown(self) -> None:
|
||||
"""Shutdown the compute provider and clean up resources."""
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
def get_available_devices(self) -> List[ComputeDevice]:
|
||||
"""
|
||||
Get list of available compute devices.
|
||||
|
||||
Returns:
|
||||
List[ComputeDevice]: Available compute devices
|
||||
"""
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
def get_device_count(self) -> int:
|
||||
"""
|
||||
Get the number of available devices.
|
||||
|
||||
Returns:
|
||||
int: Number of available devices
|
||||
"""
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
def set_device(self, device_id: int) -> bool:
|
||||
"""
|
||||
Set the active compute device.
|
||||
|
||||
Args:
|
||||
device_id: ID of the device to set as active
|
||||
|
||||
Returns:
|
||||
bool: True if device set successfully, False otherwise
|
||||
"""
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
def get_device_info(self, device_id: int) -> Optional[ComputeDevice]:
|
||||
"""
|
||||
Get information about a specific device.
|
||||
|
||||
Args:
|
||||
device_id: ID of the device
|
||||
|
||||
Returns:
|
||||
Optional[ComputeDevice]: Device information or None if not found
|
||||
"""
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
def allocate_memory(self, size: int, device_id: Optional[int] = None) -> Any:
|
||||
"""
|
||||
Allocate memory on the compute device.
|
||||
|
||||
Args:
|
||||
size: Size of memory to allocate in bytes
|
||||
device_id: Device ID (None for current device)
|
||||
|
||||
Returns:
|
||||
Any: Memory handle or pointer
|
||||
"""
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
def free_memory(self, memory_handle: Any) -> None:
|
||||
"""
|
||||
Free allocated memory.
|
||||
|
||||
Args:
|
||||
memory_handle: Memory handle to free
|
||||
"""
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
def copy_to_device(self, host_data: Any, device_data: Any) -> None:
|
||||
"""
|
||||
Copy data from host to device.
|
||||
|
||||
Args:
|
||||
host_data: Host data to copy
|
||||
device_data: Device memory destination
|
||||
"""
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
def copy_to_host(self, device_data: Any, host_data: Any) -> None:
|
||||
"""
|
||||
Copy data from device to host.
|
||||
|
||||
Args:
|
||||
device_data: Device data to copy
|
||||
host_data: Host memory destination
|
||||
"""
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
def execute_kernel(
|
||||
self,
|
||||
kernel_name: str,
|
||||
grid_size: Tuple[int, int, int],
|
||||
block_size: Tuple[int, int, int],
|
||||
args: List[Any],
|
||||
shared_memory: int = 0
|
||||
) -> bool:
|
||||
"""
|
||||
Execute a compute kernel.
|
||||
|
||||
Args:
|
||||
kernel_name: Name of the kernel to execute
|
||||
grid_size: Grid dimensions (x, y, z)
|
||||
block_size: Block dimensions (x, y, z)
|
||||
args: Kernel arguments
|
||||
shared_memory: Shared memory size in bytes
|
||||
|
||||
Returns:
|
||||
bool: True if execution successful, False otherwise
|
||||
"""
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
def synchronize(self) -> None:
|
||||
"""Synchronize device operations."""
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
def get_memory_info(self, device_id: Optional[int] = None) -> Tuple[int, int]:
|
||||
"""
|
||||
Get memory information for a device.
|
||||
|
||||
Args:
|
||||
device_id: Device ID (None for current device)
|
||||
|
||||
Returns:
|
||||
Tuple[int, int]: (free_memory, total_memory) in bytes
|
||||
"""
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
def get_utilization(self, device_id: Optional[int] = None) -> float:
|
||||
"""
|
||||
Get device utilization percentage.
|
||||
|
||||
Args:
|
||||
device_id: Device ID (None for current device)
|
||||
|
||||
Returns:
|
||||
float: Utilization percentage (0-100)
|
||||
"""
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
def get_temperature(self, device_id: Optional[int] = None) -> Optional[float]:
|
||||
"""
|
||||
Get device temperature.
|
||||
|
||||
Args:
|
||||
device_id: Device ID (None for current device)
|
||||
|
||||
Returns:
|
||||
Optional[float]: Temperature in Celsius or None if unavailable
|
||||
"""
|
||||
pass
|
||||
|
||||
# ZK-specific operations (can be implemented by specialized providers)
|
||||
|
||||
@abstractmethod
|
||||
def zk_field_add(self, a: np.ndarray, b: np.ndarray, result: np.ndarray) -> bool:
|
||||
"""
|
||||
Perform field addition for ZK operations.
|
||||
|
||||
Args:
|
||||
a: First operand
|
||||
b: Second operand
|
||||
result: Result array
|
||||
|
||||
Returns:
|
||||
bool: True if operation successful
|
||||
"""
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
def zk_field_mul(self, a: np.ndarray, b: np.ndarray, result: np.ndarray) -> bool:
|
||||
"""
|
||||
Perform field multiplication for ZK operations.
|
||||
|
||||
Args:
|
||||
a: First operand
|
||||
b: Second operand
|
||||
result: Result array
|
||||
|
||||
Returns:
|
||||
bool: True if operation successful
|
||||
"""
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
def zk_field_inverse(self, a: np.ndarray, result: np.ndarray) -> bool:
|
||||
"""
|
||||
Perform field inversion for ZK operations.
|
||||
|
||||
Args:
|
||||
a: Operand to invert
|
||||
result: Result array
|
||||
|
||||
Returns:
|
||||
bool: True if operation successful
|
||||
"""
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
def zk_multi_scalar_mul(
|
||||
self,
|
||||
scalars: List[np.ndarray],
|
||||
points: List[np.ndarray],
|
||||
result: np.ndarray
|
||||
) -> bool:
|
||||
"""
|
||||
Perform multi-scalar multiplication for ZK operations.
|
||||
|
||||
Args:
|
||||
scalars: List of scalar operands
|
||||
points: List of point operands
|
||||
result: Result array
|
||||
|
||||
Returns:
|
||||
bool: True if operation successful
|
||||
"""
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
def zk_pairing(self, p1: np.ndarray, p2: np.ndarray, result: np.ndarray) -> bool:
|
||||
"""
|
||||
Perform pairing operation for ZK operations.
|
||||
|
||||
Args:
|
||||
p1: First point
|
||||
p2: Second point
|
||||
result: Result array
|
||||
|
||||
Returns:
|
||||
bool: True if operation successful
|
||||
"""
|
||||
pass
|
||||
|
||||
# Performance and monitoring
|
||||
|
||||
@abstractmethod
|
||||
def benchmark_operation(self, operation: str, iterations: int = 100) -> Dict[str, float]:
|
||||
"""
|
||||
Benchmark a specific operation.
|
||||
|
||||
Args:
|
||||
operation: Operation name to benchmark
|
||||
iterations: Number of iterations to run
|
||||
|
||||
Returns:
|
||||
Dict[str, float]: Performance metrics
|
||||
"""
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
def get_performance_metrics(self) -> Dict[str, Any]:
|
||||
"""
|
||||
Get performance metrics for the provider.
|
||||
|
||||
Returns:
|
||||
Dict[str, Any]: Performance metrics
|
||||
"""
|
||||
pass
|
||||
|
||||
|
||||
class ComputeProviderFactory:
|
||||
"""Factory for creating compute providers."""
|
||||
|
||||
_providers = {}
|
||||
|
||||
@classmethod
|
||||
def register_provider(cls, backend: ComputeBackend, provider_class):
|
||||
"""Register a compute provider class."""
|
||||
cls._providers[backend] = provider_class
|
||||
|
||||
@classmethod
|
||||
def create_provider(cls, backend: ComputeBackend, **kwargs) -> ComputeProvider:
|
||||
"""
|
||||
Create a compute provider instance.
|
||||
|
||||
Args:
|
||||
backend: The compute backend to create
|
||||
**kwargs: Additional arguments for provider initialization
|
||||
|
||||
Returns:
|
||||
ComputeProvider: The created provider instance
|
||||
|
||||
Raises:
|
||||
ValueError: If backend is not supported
|
||||
"""
|
||||
if backend not in cls._providers:
|
||||
raise ValueError(f"Unsupported compute backend: {backend}")
|
||||
|
||||
provider_class = cls._providers[backend]
|
||||
return provider_class(**kwargs)
|
||||
|
||||
@classmethod
|
||||
def get_available_backends(cls) -> List[ComputeBackend]:
|
||||
"""Get list of available backends."""
|
||||
return list(cls._providers.keys())
|
||||
|
||||
@classmethod
|
||||
def auto_detect_backend(cls) -> ComputeBackend:
|
||||
"""
|
||||
Auto-detect the best available backend.
|
||||
|
||||
Returns:
|
||||
ComputeBackend: The detected backend
|
||||
"""
|
||||
# Try backends in order of preference
|
||||
preference_order = [
|
||||
ComputeBackend.CUDA,
|
||||
ComputeBackend.ROCM,
|
||||
ComputeBackend.APPLE_SILICON,
|
||||
ComputeBackend.OPENCL,
|
||||
ComputeBackend.CPU
|
||||
]
|
||||
|
||||
for backend in preference_order:
|
||||
if backend in cls._providers:
|
||||
try:
|
||||
provider = cls.create_provider(backend)
|
||||
if provider.initialize():
|
||||
provider.shutdown()
|
||||
return backend
|
||||
except Exception:
|
||||
continue
|
||||
|
||||
# Fallback to CPU
|
||||
return ComputeBackend.CPU
|
||||
|
||||
|
||||
class ComputeManager:
|
||||
"""High-level manager for compute operations."""
|
||||
|
||||
def __init__(self, backend: Optional[ComputeBackend] = None):
|
||||
"""
|
||||
Initialize the compute manager.
|
||||
|
||||
Args:
|
||||
backend: Specific backend to use, or None for auto-detection
|
||||
"""
|
||||
self.backend = backend or ComputeProviderFactory.auto_detect_backend()
|
||||
self.provider = ComputeProviderFactory.create_provider(self.backend)
|
||||
self.initialized = False
|
||||
|
||||
def initialize(self) -> bool:
|
||||
"""Initialize the compute manager."""
|
||||
try:
|
||||
self.initialized = self.provider.initialize()
|
||||
if self.initialized:
|
||||
print(f"✅ Compute Manager initialized with {self.backend.value} backend")
|
||||
else:
|
||||
print(f"❌ Failed to initialize {self.backend.value} backend")
|
||||
return self.initialized
|
||||
except Exception as e:
|
||||
print(f"❌ Compute Manager initialization failed: {e}")
|
||||
return False
|
||||
|
||||
def shutdown(self) -> None:
|
||||
"""Shutdown the compute manager."""
|
||||
if self.initialized:
|
||||
self.provider.shutdown()
|
||||
self.initialized = False
|
||||
print(f"🔄 Compute Manager shutdown ({self.backend.value})")
|
||||
|
||||
def get_provider(self) -> ComputeProvider:
|
||||
"""Get the underlying compute provider."""
|
||||
return self.provider
|
||||
|
||||
def get_backend_info(self) -> Dict[str, Any]:
|
||||
"""Get information about the current backend."""
|
||||
return {
|
||||
"backend": self.backend.value,
|
||||
"initialized": self.initialized,
|
||||
"device_count": self.provider.get_device_count() if self.initialized else 0,
|
||||
"available_devices": [
|
||||
device.name for device in self.provider.get_available_devices()
|
||||
] if self.initialized else []
|
||||
}
|
||||
Reference in New Issue
Block a user