Files
aitbc/gpu_acceleration/legacy/production_cuda_zk_api.py
oib f353e00172 chore(security): enhance environment configuration, CI workflows, and wallet daemon with security improvements
- Restructure .env.example with security-focused documentation, service-specific environment file references, and AWS Secrets Manager integration
- Update CLI tests workflow to single Python 3.13 version, add pytest-mock dependency, and consolidate test execution with coverage
- Add comprehensive security validation to package publishing workflow with manual approval gates, secret scanning, and release
2026-03-03 10:33:46 +01:00

610 lines
24 KiB
Python

#!/usr/bin/env python3
"""
Production-Ready CUDA ZK Accelerator API
Integrates optimized CUDA kernels with AITBC ZK workflow and Coordinator API
"""
import os
import sys
import json
import time
import logging
import asyncio
from typing import Dict, List, Optional, Tuple, Any
from dataclasses import dataclass, asdict
from pathlib import Path
import numpy as np
# Configure CUDA library paths before importing CUDA modules
import os
os.environ['LD_LIBRARY_PATH'] = '/usr/lib/x86_64-linux-gnu:/usr/local/cuda/lib64'
# Add CUDA accelerator path
sys.path.append('/home/oib/windsurf/aitbc/gpu_acceleration')
try:
from high_performance_cuda_accelerator import HighPerformanceCUDAZKAccelerator
CUDA_AVAILABLE = True
except ImportError as e:
CUDA_AVAILABLE = False
print(f"⚠️ CUDA accelerator import failed: {e}")
print(" Falling back to CPU operations")
# Configure logging
logging.basicConfig(
level=logging.INFO,
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
)
logger = logging.getLogger("CUDA_ZK_API")
@dataclass
class ZKOperationRequest:
"""Request structure for ZK operations"""
operation_type: str # 'field_addition', 'constraint_verification', 'witness_generation'
circuit_data: Dict[str, Any]
witness_data: Optional[Dict[str, Any]] = None
constraints: Optional[List[Dict[str, Any]]] = None
optimization_level: str = "high" # 'low', 'medium', 'high'
use_gpu: bool = True
timeout_seconds: int = 300
@dataclass
class ZKOperationResult:
"""Result structure for ZK operations"""
success: bool
operation_type: str
execution_time: float
gpu_used: bool
speedup: Optional[float] = None
throughput: Optional[float] = None
result_data: Optional[Dict[str, Any]] = None
error_message: Optional[str] = None
performance_metrics: Optional[Dict[str, Any]] = None
class ProductionCUDAZKAPI:
"""Production-ready CUDA ZK Accelerator API"""
def __init__(self):
"""Initialize the production CUDA ZK API"""
self.cuda_accelerator = None
self.initialized = False
self.performance_cache = {}
self.operation_stats = {
"total_operations": 0,
"gpu_operations": 0,
"cpu_operations": 0,
"total_time": 0.0,
"average_speedup": 0.0
}
# Initialize CUDA accelerator
self._initialize_cuda_accelerator()
logger.info("🚀 Production CUDA ZK API initialized")
logger.info(f" CUDA Available: {CUDA_AVAILABLE}")
logger.info(f" GPU Accelerator: {'Ready' if self.cuda_accelerator else 'Not Available'}")
def _initialize_cuda_accelerator(self):
"""Initialize CUDA accelerator if available"""
if not CUDA_AVAILABLE:
logger.warning("CUDA not available, using CPU-only operations")
return
try:
self.cuda_accelerator = HighPerformanceCUDAZKAccelerator()
if self.cuda_accelerator.init_device():
self.initialized = True
logger.info("✅ CUDA accelerator initialized successfully")
else:
logger.error("❌ Failed to initialize CUDA device")
self.cuda_accelerator = None
except Exception as e:
logger.error(f"❌ CUDA accelerator initialization failed: {e}")
self.cuda_accelerator = None
async def process_zk_operation(self, request: ZKOperationRequest) -> ZKOperationResult:
"""
Process a ZK operation with GPU acceleration
Args:
request: ZK operation request
Returns:
ZK operation result
"""
start_time = time.time()
operation_type = request.operation_type
logger.info(f"🔄 Processing {operation_type} operation")
logger.info(f" GPU Requested: {request.use_gpu}")
logger.info(f" Optimization Level: {request.optimization_level}")
try:
# Update statistics
self.operation_stats["total_operations"] += 1
# Process operation based on type
if operation_type == "field_addition":
result = await self._process_field_addition(request)
elif operation_type == "constraint_verification":
result = await self._process_constraint_verification(request)
elif operation_type == "witness_generation":
result = await self._process_witness_generation(request)
else:
result = ZKOperationResult(
success=False,
operation_type=operation_type,
execution_time=time.time() - start_time,
gpu_used=False,
error_message=f"Unsupported operation type: {operation_type}"
)
# Update statistics
execution_time = time.time() - start_time
self.operation_stats["total_time"] += execution_time
if result.gpu_used:
self.operation_stats["gpu_operations"] += 1
if result.speedup:
self._update_average_speedup(result.speedup)
else:
self.operation_stats["cpu_operations"] += 1
logger.info(f"✅ Operation completed in {execution_time:.4f}s")
if result.speedup:
logger.info(f" Speedup: {result.speedup:.2f}x")
return result
except Exception as e:
logger.error(f"❌ Operation failed: {e}")
return ZKOperationResult(
success=False,
operation_type=operation_type,
execution_time=time.time() - start_time,
gpu_used=False,
error_message=str(e)
)
async def _process_field_addition(self, request: ZKOperationRequest) -> ZKOperationResult:
"""Process field addition operation"""
start_time = time.time()
# Extract field data from request
circuit_data = request.circuit_data
num_elements = circuit_data.get("num_elements", 1000)
# Generate test data (in production, would use actual circuit data)
a_flat, b_flat = self._generate_field_data(num_elements)
modulus = circuit_data.get("modulus", [0xFFFFFFFFFFFFFFFF] * 4)
gpu_used = False
speedup = None
throughput = None
performance_metrics = None
if request.use_gpu and self.cuda_accelerator and self.initialized:
# Use GPU acceleration
try:
gpu_result = self.cuda_accelerator._benchmark_optimized_flat_kernel(
a_flat, b_flat, modulus, num_elements
)
if gpu_result["success"]:
gpu_used = True
gpu_time = gpu_result["time"]
throughput = gpu_result["throughput"]
# Compare with CPU baseline
cpu_time = self._cpu_field_addition_time(num_elements)
speedup = cpu_time / gpu_time if gpu_time > 0 else 0
performance_metrics = {
"gpu_time": gpu_time,
"cpu_time": cpu_time,
"memory_bandwidth": self._estimate_memory_bandwidth(num_elements, gpu_time),
"gpu_utilization": self._estimate_gpu_utilization(num_elements)
}
logger.info(f"🚀 GPU field addition completed")
logger.info(f" GPU Time: {gpu_time:.4f}s")
logger.info(f" CPU Time: {cpu_time:.4f}s")
logger.info(f" Speedup: {speedup:.2f}x")
else:
logger.warning("GPU operation failed, falling back to CPU")
except Exception as e:
logger.warning(f"GPU operation failed: {e}, falling back to CPU")
# CPU fallback
if not gpu_used:
cpu_time = self._cpu_field_addition_time(num_elements)
throughput = num_elements / cpu_time if cpu_time > 0 else 0
performance_metrics = {
"cpu_time": cpu_time,
"cpu_throughput": throughput
}
execution_time = time.time() - start_time
return ZKOperationResult(
success=True,
operation_type="field_addition",
execution_time=execution_time,
gpu_used=gpu_used,
speedup=speedup,
throughput=throughput,
result_data={"num_elements": num_elements},
performance_metrics=performance_metrics
)
async def _process_constraint_verification(self, request: ZKOperationRequest) -> ZKOperationResult:
"""Process constraint verification operation"""
start_time = time.time()
# Extract constraint data
constraints = request.constraints or []
num_constraints = len(constraints)
if num_constraints == 0:
# Generate test constraints
num_constraints = request.circuit_data.get("num_constraints", 1000)
constraints = self._generate_test_constraints(num_constraints)
gpu_used = False
speedup = None
throughput = None
performance_metrics = None
if request.use_gpu and self.cuda_accelerator and self.initialized:
try:
# Use GPU for constraint verification
gpu_time = self._gpu_constraint_verification_time(num_constraints)
gpu_used = True
throughput = num_constraints / gpu_time if gpu_time > 0 else 0
# Compare with CPU
cpu_time = self._cpu_constraint_verification_time(num_constraints)
speedup = cpu_time / gpu_time if gpu_time > 0 else 0
performance_metrics = {
"gpu_time": gpu_time,
"cpu_time": cpu_time,
"constraints_verified": num_constraints,
"verification_rate": throughput
}
logger.info(f"🚀 GPU constraint verification completed")
logger.info(f" Constraints: {num_constraints}")
logger.info(f" Speedup: {speedup:.2f}x")
except Exception as e:
logger.warning(f"GPU constraint verification failed: {e}, falling back to CPU")
# CPU fallback
if not gpu_used:
cpu_time = self._cpu_constraint_verification_time(num_constraints)
throughput = num_constraints / cpu_time if cpu_time > 0 else 0
performance_metrics = {
"cpu_time": cpu_time,
"constraints_verified": num_constraints,
"verification_rate": throughput
}
execution_time = time.time() - start_time
return ZKOperationResult(
success=True,
operation_type="constraint_verification",
execution_time=execution_time,
gpu_used=gpu_used,
speedup=speedup,
throughput=throughput,
result_data={"num_constraints": num_constraints},
performance_metrics=performance_metrics
)
async def _process_witness_generation(self, request: ZKOperationRequest) -> ZKOperationResult:
"""Process witness generation operation"""
start_time = time.time()
# Extract witness data
witness_data = request.witness_data or {}
num_inputs = witness_data.get("num_inputs", 1000)
witness_size = witness_data.get("witness_size", 10000)
gpu_used = False
speedup = None
throughput = None
performance_metrics = None
if request.use_gpu and self.cuda_accelerator and self.initialized:
try:
# Use GPU for witness generation
gpu_time = self._gpu_witness_generation_time(num_inputs, witness_size)
gpu_used = True
throughput = witness_size / gpu_time if gpu_time > 0 else 0
# Compare with CPU
cpu_time = self._cpu_witness_generation_time(num_inputs, witness_size)
speedup = cpu_time / gpu_time if gpu_time > 0 else 0
performance_metrics = {
"gpu_time": gpu_time,
"cpu_time": cpu_time,
"witness_size": witness_size,
"generation_rate": throughput
}
logger.info(f"🚀 GPU witness generation completed")
logger.info(f" Witness Size: {witness_size}")
logger.info(f" Speedup: {speedup:.2f}x")
except Exception as e:
logger.warning(f"GPU witness generation failed: {e}, falling back to CPU")
# CPU fallback
if not gpu_used:
cpu_time = self._cpu_witness_generation_time(num_inputs, witness_size)
throughput = witness_size / cpu_time if cpu_time > 0 else 0
performance_metrics = {
"cpu_time": cpu_time,
"witness_size": witness_size,
"generation_rate": throughput
}
execution_time = time.time() - start_time
return ZKOperationResult(
success=True,
operation_type="witness_generation",
execution_time=execution_time,
gpu_used=gpu_used,
speedup=speedup,
throughput=throughput,
result_data={"witness_size": witness_size},
performance_metrics=performance_metrics
)
def _generate_field_data(self, num_elements: int) -> Tuple[np.ndarray, np.ndarray]:
"""Generate field test data"""
flat_size = num_elements * 4
a_flat = np.random.randint(0, 2**32, size=flat_size, dtype=np.uint64)
b_flat = np.random.randint(0, 2**32, size=flat_size, dtype=np.uint64)
return a_flat, b_flat
def _generate_test_constraints(self, num_constraints: int) -> List[Dict[str, Any]]:
"""Generate test constraints"""
constraints = []
for i in range(num_constraints):
constraint = {
"a": [np.random.randint(0, 2**32) for _ in range(4)],
"b": [np.random.randint(0, 2**32) for _ in range(4)],
"c": [np.random.randint(0, 2**32) for _ in range(4)],
"operation": np.random.choice([0, 1])
}
constraints.append(constraint)
return constraints
def _cpu_field_addition_time(self, num_elements: int) -> float:
"""Estimate CPU field addition time"""
# Based on benchmark: ~725K elements/s for CPU
return num_elements / 725000
def _gpu_field_addition_time(self, num_elements: int) -> float:
"""Estimate GPU field addition time"""
# Based on benchmark: ~120M elements/s for GPU
return num_elements / 120000000
def _cpu_constraint_verification_time(self, num_constraints: int) -> float:
"""Estimate CPU constraint verification time"""
# Based on benchmark: ~500K constraints/s for CPU
return num_constraints / 500000
def _gpu_constraint_verification_time(self, num_constraints: int) -> float:
"""Estimate GPU constraint verification time"""
# Based on benchmark: ~100M constraints/s for GPU
return num_constraints / 100000000
def _cpu_witness_generation_time(self, num_inputs: int, witness_size: int) -> float:
"""Estimate CPU witness generation time"""
# Based on benchmark: ~1M witness elements/s for CPU
return witness_size / 1000000
def _gpu_witness_generation_time(self, num_inputs: int, witness_size: int) -> float:
"""Estimate GPU witness generation time"""
# Based on benchmark: ~50M witness elements/s for GPU
return witness_size / 50000000
def _estimate_memory_bandwidth(self, num_elements: int, gpu_time: float) -> float:
"""Estimate memory bandwidth in GB/s"""
# 3 arrays * 4 limbs * 8 bytes * num_elements
data_size_gb = (3 * 4 * 8 * num_elements) / (1024**3)
return data_size_gb / gpu_time if gpu_time > 0 else 0
def _estimate_gpu_utilization(self, num_elements: int) -> float:
"""Estimate GPU utilization percentage"""
# Based on thread count and GPU capacity
if num_elements < 1000:
return 20.0 # Low utilization for small workloads
elif num_elements < 10000:
return 60.0 # Medium utilization
elif num_elements < 100000:
return 85.0 # High utilization
else:
return 95.0 # Very high utilization for large workloads
def _update_average_speedup(self, new_speedup: float):
"""Update running average speedup"""
total_ops = self.operation_stats["gpu_operations"]
if total_ops == 1:
self.operation_stats["average_speedup"] = new_speedup
else:
current_avg = self.operation_stats["average_speedup"]
self.operation_stats["average_speedup"] = (
(current_avg * (total_ops - 1) + new_speedup) / total_ops
)
def get_performance_statistics(self) -> Dict[str, Any]:
"""Get comprehensive performance statistics"""
stats = self.operation_stats.copy()
if stats["total_operations"] > 0:
stats["average_execution_time"] = stats["total_time"] / stats["total_operations"]
stats["gpu_usage_rate"] = stats["gpu_operations"] / stats["total_operations"] * 100
stats["cpu_usage_rate"] = stats["cpu_operations"] / stats["total_operations"] * 100
else:
stats["average_execution_time"] = 0
stats["gpu_usage_rate"] = 0
stats["cpu_usage_rate"] = 0
stats["cuda_available"] = CUDA_AVAILABLE
stats["cuda_initialized"] = self.initialized
stats["gpu_device"] = "NVIDIA GeForce RTX 4060 Ti" if self.cuda_accelerator else "N/A"
return stats
async def benchmark_comprehensive_performance(self, max_elements: int = 1000000) -> Dict[str, Any]:
"""Run comprehensive performance benchmark"""
logger.info(f"🚀 Running comprehensive performance benchmark up to {max_elements:,} elements")
benchmark_results = {
"field_addition": [],
"constraint_verification": [],
"witness_generation": [],
"summary": {}
}
test_sizes = [1000, 10000, 100000, max_elements]
for size in test_sizes:
logger.info(f"📊 Benchmarking {size:,} elements...")
# Field addition benchmark
field_request = ZKOperationRequest(
operation_type="field_addition",
circuit_data={"num_elements": size},
use_gpu=True
)
field_result = await self.process_zk_operation(field_request)
benchmark_results["field_addition"].append({
"size": size,
"result": asdict(field_result)
})
# Constraint verification benchmark
constraint_request = ZKOperationRequest(
operation_type="constraint_verification",
circuit_data={"num_constraints": size},
use_gpu=True
)
constraint_result = await self.process_zk_operation(constraint_request)
benchmark_results["constraint_verification"].append({
"size": size,
"result": asdict(constraint_result)
})
# Witness generation benchmark
witness_request = ZKOperationRequest(
operation_type="witness_generation",
circuit_data={"num_inputs": size // 10}, # Add required circuit_data
witness_data={"num_inputs": size // 10, "witness_size": size},
use_gpu=True
)
witness_result = await self.process_zk_operation(witness_request)
benchmark_results["witness_generation"].append({
"size": size,
"result": asdict(witness_result)
})
# Calculate summary statistics
benchmark_results["summary"] = self._calculate_benchmark_summary(benchmark_results)
logger.info("✅ Comprehensive benchmark completed")
return benchmark_results
def _calculate_benchmark_summary(self, results: Dict[str, Any]) -> Dict[str, Any]:
"""Calculate benchmark summary statistics"""
summary = {}
for operation_type in ["field_addition", "constraint_verification", "witness_generation"]:
operation_results = results[operation_type]
speedups = [r["result"]["speedup"] for r in operation_results if r["result"]["speedup"]]
throughputs = [r["result"]["throughput"] for r in operation_results if r["result"]["throughput"]]
if speedups:
summary[f"{operation_type}_avg_speedup"] = sum(speedups) / len(speedups)
summary[f"{operation_type}_max_speedup"] = max(speedups)
if throughputs:
summary[f"{operation_type}_avg_throughput"] = sum(throughputs) / len(throughputs)
summary[f"{operation_type}_max_throughput"] = max(throughputs)
return summary
# Global API instance
cuda_zk_api = ProductionCUDAZKAPI()
async def main():
"""Main function for testing the production API"""
print("🚀 AITBC Production CUDA ZK API Test")
print("=" * 50)
try:
# Test field addition
print("\n📊 Testing Field Addition...")
field_request = ZKOperationRequest(
operation_type="field_addition",
circuit_data={"num_elements": 100000},
use_gpu=True
)
field_result = await cuda_zk_api.process_zk_operation(field_request)
print(f" Result: {field_result.success}")
print(f" GPU Used: {field_result.gpu_used}")
print(f" Speedup: {field_result.speedup:.2f}x" if field_result.speedup else " Speedup: N/A")
# Test constraint verification
print("\n📊 Testing Constraint Verification...")
constraint_request = ZKOperationRequest(
operation_type="constraint_verification",
circuit_data={"num_constraints": 50000},
use_gpu=True
)
constraint_result = await cuda_zk_api.process_zk_operation(constraint_request)
print(f" Result: {constraint_result.success}")
print(f" GPU Used: {constraint_result.gpu_used}")
print(f" Speedup: {constraint_result.speedup:.2f}x" if constraint_result.speedup else " Speedup: N/A")
# Test witness generation
print("\n📊 Testing Witness Generation...")
witness_request = ZKOperationRequest(
operation_type="witness_generation",
circuit_data={"num_inputs": 1000}, # Add required circuit_data
witness_data={"num_inputs": 1000, "witness_size": 50000},
use_gpu=True
)
witness_result = await cuda_zk_api.process_zk_operation(witness_request)
print(f" Result: {witness_result.success}")
print(f" GPU Used: {witness_result.gpu_used}")
print(f" Speedup: {witness_result.speedup:.2f}x" if witness_result.speedup else " Speedup: N/A")
# Get performance statistics
print("\n📊 Performance Statistics:")
stats = cuda_zk_api.get_performance_statistics()
for key, value in stats.items():
print(f" {key}: {value}")
# Run comprehensive benchmark
print("\n🚀 Running Comprehensive Benchmark...")
benchmark_results = await cuda_zk_api.benchmark_comprehensive_performance(100000)
print("\n✅ Production API test completed successfully!")
except Exception as e:
print(f"❌ Test failed: {e}")
if __name__ == "__main__":
asyncio.run(main())