Remove legacy folder and rewire imports
Some checks failed
Some checks failed
- Move marketplace_gpu_optimizer.py from legacy to parallel_processing - Update coordinator-api imports to use new dev/gpu_acceleration location - Remove legacy folder as code has been refactored - Fix marketplace_performance.py imports for gpu_acceleration
This commit is contained in:
@@ -16,16 +16,16 @@ logger = logging.getLogger(__name__)
|
|||||||
import os
|
import os
|
||||||
import sys
|
import sys
|
||||||
|
|
||||||
sys.path.append(os.path.join(os.path.dirname(__file__), "../../../../../gpu_acceleration"))
|
sys.path.append(os.path.join(os.path.dirname(__file__), "../../../../../dev/gpu_acceleration"))
|
||||||
from marketplace_gpu_optimizer import MarketplaceGPUOptimizer
|
from parallel_processing.marketplace_gpu_optimizer import MarketplaceGPUOptimizer
|
||||||
|
|
||||||
from aitbc.gpu_acceleration.parallel_processing.distributed_framework import (
|
from dev.gpu_acceleration.parallel_processing.distributed_framework import (
|
||||||
DistributedProcessingCoordinator,
|
DistributedProcessingCoordinator,
|
||||||
DistributedTask,
|
DistributedTask,
|
||||||
)
|
)
|
||||||
from aitbc.gpu_acceleration.parallel_processing.marketplace_cache_optimizer import MarketplaceDataOptimizer
|
from dev.gpu_acceleration.parallel_processing.marketplace_cache_optimizer import MarketplaceDataOptimizer
|
||||||
from aitbc.gpu_acceleration.parallel_processing.marketplace_monitor import monitor as marketplace_monitor
|
from dev.gpu_acceleration.parallel_processing.marketplace_monitor import monitor as marketplace_monitor
|
||||||
from aitbc.gpu_acceleration.parallel_processing.marketplace_scaler import ResourceScaler
|
from dev.gpu_acceleration.parallel_processing.marketplace_scaler import ResourceScaler
|
||||||
|
|
||||||
router = APIRouter(prefix="/v1/marketplace/performance", tags=["marketplace-performance"])
|
router = APIRouter(prefix="/v1/marketplace/performance", tags=["marketplace-performance"])
|
||||||
|
|
||||||
|
|||||||
@@ -1,354 +0,0 @@
|
|||||||
#!/usr/bin/env python3
|
|
||||||
"""
|
|
||||||
FastAPI Integration for Production CUDA ZK Accelerator
|
|
||||||
Provides REST API endpoints for GPU-accelerated ZK circuit operations
|
|
||||||
"""
|
|
||||||
|
|
||||||
from fastapi import FastAPI, HTTPException, BackgroundTasks
|
|
||||||
from fastapi.middleware.cors import CORSMiddleware
|
|
||||||
from pydantic import BaseModel, Field
|
|
||||||
from typing import Dict, List, Optional, Any
|
|
||||||
import asyncio
|
|
||||||
import logging
|
|
||||||
import time
|
|
||||||
import os
|
|
||||||
import sys
|
|
||||||
|
|
||||||
# Add GPU acceleration path
|
|
||||||
sys.path.append('/home/oib/windsurf/aitbc/gpu_acceleration')
|
|
||||||
|
|
||||||
try:
|
|
||||||
from production_cuda_zk_api import ProductionCUDAZKAPI, ZKOperationRequest, ZKOperationResult
|
|
||||||
CUDA_AVAILABLE = True
|
|
||||||
except ImportError as e:
|
|
||||||
CUDA_AVAILABLE = False
|
|
||||||
print(f"⚠️ CUDA API import failed: {e}")
|
|
||||||
|
|
||||||
# Configure logging
|
|
||||||
logging.basicConfig(level=logging.INFO)
|
|
||||||
logger = logging.getLogger("CUDA_ZK_FASTAPI")
|
|
||||||
|
|
||||||
# Initialize FastAPI app
|
|
||||||
app = FastAPI(
|
|
||||||
title="AITBC CUDA ZK Acceleration API",
|
|
||||||
description="Production-ready GPU acceleration for zero-knowledge circuit operations",
|
|
||||||
version="1.0.0",
|
|
||||||
docs_url="/docs",
|
|
||||||
redoc_url="/redoc"
|
|
||||||
)
|
|
||||||
|
|
||||||
# Add CORS middleware
|
|
||||||
app.add_middleware(
|
|
||||||
CORSMiddleware,
|
|
||||||
allow_origins=["*"],
|
|
||||||
allow_credentials=True,
|
|
||||||
allow_methods=["*"],
|
|
||||||
allow_headers=["*"],
|
|
||||||
)
|
|
||||||
|
|
||||||
# Initialize CUDA API
|
|
||||||
cuda_api = ProductionCUDAZKAPI()
|
|
||||||
|
|
||||||
# Pydantic models for API
|
|
||||||
class FieldAdditionRequest(BaseModel):
|
|
||||||
num_elements: int = Field(..., ge=1, le=10000000, description="Number of field elements")
|
|
||||||
modulus: Optional[List[int]] = Field(default=[0xFFFFFFFFFFFFFFFF] * 4, description="Field modulus")
|
|
||||||
optimization_level: str = Field(default="high", pattern="^(low|medium|high)$")
|
|
||||||
use_gpu: bool = Field(default=True, description="Use GPU acceleration")
|
|
||||||
|
|
||||||
class ConstraintVerificationRequest(BaseModel):
|
|
||||||
num_constraints: int = Field(..., ge=1, le=10000000, description="Number of constraints")
|
|
||||||
constraints: Optional[List[Dict[str, Any]]] = Field(default=None, description="Constraint data")
|
|
||||||
optimization_level: str = Field(default="high", pattern="^(low|medium|high)$")
|
|
||||||
use_gpu: bool = Field(default=True, description="Use GPU acceleration")
|
|
||||||
|
|
||||||
class WitnessGenerationRequest(BaseModel):
|
|
||||||
num_inputs: int = Field(..., ge=1, le=1000000, description="Number of inputs")
|
|
||||||
witness_size: int = Field(..., ge=1, le=10000000, description="Witness size")
|
|
||||||
optimization_level: str = Field(default="high", pattern="^(low|medium|high)$")
|
|
||||||
use_gpu: bool = Field(default=True, description="Use GPU acceleration")
|
|
||||||
|
|
||||||
class BenchmarkRequest(BaseModel):
|
|
||||||
max_elements: int = Field(default=1000000, ge=1000, le=10000000, description="Maximum elements to benchmark")
|
|
||||||
|
|
||||||
class APIResponse(BaseModel):
|
|
||||||
success: bool
|
|
||||||
message: str
|
|
||||||
data: Optional[Dict[str, Any]] = None
|
|
||||||
execution_time: Optional[float] = None
|
|
||||||
gpu_used: Optional[bool] = None
|
|
||||||
speedup: Optional[float] = None
|
|
||||||
|
|
||||||
# Health check endpoint
|
|
||||||
@app.get("/health", response_model=Dict[str, Any])
|
|
||||||
async def health_check():
|
|
||||||
"""Health check endpoint"""
|
|
||||||
try:
|
|
||||||
stats = cuda_api.get_performance_statistics()
|
|
||||||
return {
|
|
||||||
"status": "healthy",
|
|
||||||
"timestamp": time.time(),
|
|
||||||
"cuda_available": stats["cuda_available"],
|
|
||||||
"cuda_initialized": stats["cuda_initialized"],
|
|
||||||
"gpu_device": stats["gpu_device"]
|
|
||||||
}
|
|
||||||
except Exception as e:
|
|
||||||
logger.error(f"Health check failed: {e}")
|
|
||||||
raise HTTPException(status_code=500, detail=str(e))
|
|
||||||
|
|
||||||
# Performance statistics endpoint
|
|
||||||
@app.get("/stats", response_model=Dict[str, Any])
|
|
||||||
async def get_performance_stats():
|
|
||||||
"""Get comprehensive performance statistics"""
|
|
||||||
try:
|
|
||||||
return cuda_api.get_performance_statistics()
|
|
||||||
except Exception as e:
|
|
||||||
logger.error(f"Failed to get stats: {e}")
|
|
||||||
raise HTTPException(status_code=500, detail=str(e))
|
|
||||||
|
|
||||||
# Field addition endpoint
|
|
||||||
@app.post("/field-addition", response_model=APIResponse)
|
|
||||||
async def field_addition(request: FieldAdditionRequest):
|
|
||||||
"""Perform GPU-accelerated field addition"""
|
|
||||||
start_time = time.time()
|
|
||||||
|
|
||||||
try:
|
|
||||||
zk_request = ZKOperationRequest(
|
|
||||||
operation_type="field_addition",
|
|
||||||
circuit_data={
|
|
||||||
"num_elements": request.num_elements,
|
|
||||||
"modulus": request.modulus
|
|
||||||
},
|
|
||||||
optimization_level=request.optimization_level,
|
|
||||||
use_gpu=request.use_gpu
|
|
||||||
)
|
|
||||||
|
|
||||||
result = await cuda_api.process_zk_operation(zk_request)
|
|
||||||
|
|
||||||
return APIResponse(
|
|
||||||
success=result.success,
|
|
||||||
message="Field addition completed successfully" if result.success else "Field addition failed",
|
|
||||||
data=result.result_data,
|
|
||||||
execution_time=result.execution_time,
|
|
||||||
gpu_used=result.gpu_used,
|
|
||||||
speedup=result.speedup
|
|
||||||
)
|
|
||||||
|
|
||||||
except Exception as e:
|
|
||||||
logger.error(f"Field addition failed: {e}")
|
|
||||||
raise HTTPException(status_code=500, detail=str(e))
|
|
||||||
|
|
||||||
# Constraint verification endpoint
|
|
||||||
@app.post("/constraint-verification", response_model=APIResponse)
|
|
||||||
async def constraint_verification(request: ConstraintVerificationRequest):
|
|
||||||
"""Perform GPU-accelerated constraint verification"""
|
|
||||||
start_time = time.time()
|
|
||||||
|
|
||||||
try:
|
|
||||||
zk_request = ZKOperationRequest(
|
|
||||||
operation_type="constraint_verification",
|
|
||||||
circuit_data={"num_constraints": request.num_constraints},
|
|
||||||
constraints=request.constraints,
|
|
||||||
optimization_level=request.optimization_level,
|
|
||||||
use_gpu=request.use_gpu
|
|
||||||
)
|
|
||||||
|
|
||||||
result = await cuda_api.process_zk_operation(zk_request)
|
|
||||||
|
|
||||||
return APIResponse(
|
|
||||||
success=result.success,
|
|
||||||
message="Constraint verification completed successfully" if result.success else "Constraint verification failed",
|
|
||||||
data=result.result_data,
|
|
||||||
execution_time=result.execution_time,
|
|
||||||
gpu_used=result.gpu_used,
|
|
||||||
speedup=result.speedup
|
|
||||||
)
|
|
||||||
|
|
||||||
except Exception as e:
|
|
||||||
logger.error(f"Constraint verification failed: {e}")
|
|
||||||
raise HTTPException(status_code=500, detail=str(e))
|
|
||||||
|
|
||||||
# Witness generation endpoint
|
|
||||||
@app.post("/witness-generation", response_model=APIResponse)
|
|
||||||
async def witness_generation(request: WitnessGenerationRequest):
|
|
||||||
"""Perform GPU-accelerated witness generation"""
|
|
||||||
start_time = time.time()
|
|
||||||
|
|
||||||
try:
|
|
||||||
zk_request = ZKOperationRequest(
|
|
||||||
operation_type="witness_generation",
|
|
||||||
circuit_data={"num_inputs": request.num_inputs},
|
|
||||||
witness_data={"num_inputs": request.num_inputs, "witness_size": request.witness_size},
|
|
||||||
optimization_level=request.optimization_level,
|
|
||||||
use_gpu=request.use_gpu
|
|
||||||
)
|
|
||||||
|
|
||||||
result = await cuda_api.process_zk_operation(zk_request)
|
|
||||||
|
|
||||||
return APIResponse(
|
|
||||||
success=result.success,
|
|
||||||
message="Witness generation completed successfully" if result.success else "Witness generation failed",
|
|
||||||
data=result.result_data,
|
|
||||||
execution_time=result.execution_time,
|
|
||||||
gpu_used=result.gpu_used,
|
|
||||||
speedup=result.speedup
|
|
||||||
)
|
|
||||||
|
|
||||||
except Exception as e:
|
|
||||||
logger.error(f"Witness generation failed: {e}")
|
|
||||||
raise HTTPException(status_code=500, detail=str(e))
|
|
||||||
|
|
||||||
# Comprehensive benchmark endpoint
|
|
||||||
@app.post("/benchmark", response_model=Dict[str, Any])
|
|
||||||
async def comprehensive_benchmark(request: BenchmarkRequest, background_tasks: BackgroundTasks):
|
|
||||||
"""Run comprehensive performance benchmark"""
|
|
||||||
try:
|
|
||||||
logger.info(f"Starting comprehensive benchmark up to {request.max_elements:,} elements")
|
|
||||||
|
|
||||||
# Run benchmark asynchronously
|
|
||||||
results = await cuda_api.benchmark_comprehensive_performance(request.max_elements)
|
|
||||||
|
|
||||||
return {
|
|
||||||
"success": True,
|
|
||||||
"message": "Comprehensive benchmark completed",
|
|
||||||
"data": results,
|
|
||||||
"timestamp": time.time()
|
|
||||||
}
|
|
||||||
|
|
||||||
except Exception as e:
|
|
||||||
logger.error(f"Benchmark failed: {e}")
|
|
||||||
raise HTTPException(status_code=500, detail=str(e))
|
|
||||||
|
|
||||||
# Quick benchmark endpoint
|
|
||||||
@app.get("/quick-benchmark", response_model=Dict[str, Any])
|
|
||||||
async def quick_benchmark():
|
|
||||||
"""Run quick performance benchmark"""
|
|
||||||
try:
|
|
||||||
logger.info("Running quick benchmark")
|
|
||||||
|
|
||||||
# Test field addition with 100K elements
|
|
||||||
field_request = ZKOperationRequest(
|
|
||||||
operation_type="field_addition",
|
|
||||||
circuit_data={"num_elements": 100000},
|
|
||||||
use_gpu=True
|
|
||||||
)
|
|
||||||
field_result = await cuda_api.process_zk_operation(field_request)
|
|
||||||
|
|
||||||
# Test constraint verification with 50K constraints
|
|
||||||
constraint_request = ZKOperationRequest(
|
|
||||||
operation_type="constraint_verification",
|
|
||||||
circuit_data={"num_constraints": 50000},
|
|
||||||
use_gpu=True
|
|
||||||
)
|
|
||||||
constraint_result = await cuda_api.process_zk_operation(constraint_request)
|
|
||||||
|
|
||||||
return {
|
|
||||||
"success": True,
|
|
||||||
"message": "Quick benchmark completed",
|
|
||||||
"data": {
|
|
||||||
"field_addition": {
|
|
||||||
"success": field_result.success,
|
|
||||||
"execution_time": field_result.execution_time,
|
|
||||||
"gpu_used": field_result.gpu_used,
|
|
||||||
"speedup": field_result.speedup,
|
|
||||||
"throughput": field_result.throughput
|
|
||||||
},
|
|
||||||
"constraint_verification": {
|
|
||||||
"success": constraint_result.success,
|
|
||||||
"execution_time": constraint_result.execution_time,
|
|
||||||
"gpu_used": constraint_result.gpu_used,
|
|
||||||
"speedup": constraint_result.speedup,
|
|
||||||
"throughput": constraint_result.throughput
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"timestamp": time.time()
|
|
||||||
}
|
|
||||||
|
|
||||||
except Exception as e:
|
|
||||||
logger.error(f"Quick benchmark failed: {e}")
|
|
||||||
raise HTTPException(status_code=500, detail=str(e))
|
|
||||||
|
|
||||||
# GPU information endpoint
|
|
||||||
@app.get("/gpu-info", response_model=Dict[str, Any])
|
|
||||||
async def get_gpu_info():
|
|
||||||
"""Get GPU information and capabilities"""
|
|
||||||
try:
|
|
||||||
stats = cuda_api.get_performance_statistics()
|
|
||||||
|
|
||||||
return {
|
|
||||||
"cuda_available": stats["cuda_available"],
|
|
||||||
"cuda_initialized": stats["cuda_initialized"],
|
|
||||||
"gpu_device": stats["gpu_device"],
|
|
||||||
"total_operations": stats["total_operations"],
|
|
||||||
"gpu_operations": stats["gpu_operations"],
|
|
||||||
"cpu_operations": stats["cpu_operations"],
|
|
||||||
"gpu_usage_rate": stats.get("gpu_usage_rate", 0),
|
|
||||||
"average_speedup": stats.get("average_speedup", 0),
|
|
||||||
"average_execution_time": stats.get("average_execution_time", 0)
|
|
||||||
}
|
|
||||||
|
|
||||||
except Exception as e:
|
|
||||||
logger.error(f"Failed to get GPU info: {e}")
|
|
||||||
raise HTTPException(status_code=500, detail=str(e))
|
|
||||||
|
|
||||||
# Reset statistics endpoint
|
|
||||||
@app.post("/reset-stats", response_model=Dict[str, str])
|
|
||||||
async def reset_statistics():
|
|
||||||
"""Reset performance statistics"""
|
|
||||||
try:
|
|
||||||
# Reset the statistics in the CUDA API
|
|
||||||
cuda_api.operation_stats = {
|
|
||||||
"total_operations": 0,
|
|
||||||
"gpu_operations": 0,
|
|
||||||
"cpu_operations": 0,
|
|
||||||
"total_time": 0.0,
|
|
||||||
"average_speedup": 0.0
|
|
||||||
}
|
|
||||||
|
|
||||||
return {"success": True, "message": "Statistics reset successfully"}
|
|
||||||
|
|
||||||
except Exception as e:
|
|
||||||
logger.error(f"Failed to reset stats: {e}")
|
|
||||||
raise HTTPException(status_code=500, detail=str(e))
|
|
||||||
|
|
||||||
# Root endpoint
|
|
||||||
@app.get("/", response_model=Dict[str, Any])
|
|
||||||
async def root():
|
|
||||||
"""Root endpoint with API information"""
|
|
||||||
return {
|
|
||||||
"name": "AITBC CUDA ZK Acceleration API",
|
|
||||||
"version": "1.0.0",
|
|
||||||
"description": "Production-ready GPU acceleration for zero-knowledge circuit operations",
|
|
||||||
"endpoints": {
|
|
||||||
"health": "/health",
|
|
||||||
"stats": "/stats",
|
|
||||||
"gpu_info": "/gpu-info",
|
|
||||||
"field_addition": "/field-addition",
|
|
||||||
"constraint_verification": "/constraint-verification",
|
|
||||||
"witness_generation": "/witness-generation",
|
|
||||||
"quick_benchmark": "/quick-benchmark",
|
|
||||||
"comprehensive_benchmark": "/benchmark",
|
|
||||||
"docs": "/docs",
|
|
||||||
"redoc": "/redoc"
|
|
||||||
},
|
|
||||||
"cuda_available": CUDA_AVAILABLE,
|
|
||||||
"timestamp": time.time()
|
|
||||||
}
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
|
||||||
import uvicorn
|
|
||||||
|
|
||||||
print("🚀 Starting AITBC CUDA ZK Acceleration API Server")
|
|
||||||
print("=" * 50)
|
|
||||||
print(f" CUDA Available: {CUDA_AVAILABLE}")
|
|
||||||
print(f" API Documentation: http://localhost:8001/docs")
|
|
||||||
print(f" ReDoc Documentation: http://localhost:8001/redoc")
|
|
||||||
print("=" * 50)
|
|
||||||
|
|
||||||
uvicorn.run(
|
|
||||||
"fastapi_cuda_zk_api:app",
|
|
||||||
host="0.0.0.0",
|
|
||||||
port=8001,
|
|
||||||
reload=True,
|
|
||||||
log_level="info"
|
|
||||||
)
|
|
||||||
@@ -1,453 +0,0 @@
|
|||||||
#!/usr/bin/env python3
|
|
||||||
"""
|
|
||||||
High-Performance CUDA ZK Accelerator with Optimized Kernels
|
|
||||||
Implements optimized CUDA kernels with memory coalescing, vectorization, and shared memory
|
|
||||||
"""
|
|
||||||
|
|
||||||
import ctypes
|
|
||||||
import numpy as np
|
|
||||||
from typing import List, Tuple, Optional
|
|
||||||
import os
|
|
||||||
import sys
|
|
||||||
import time
|
|
||||||
|
|
||||||
# Optimized field element structure for flat array access
|
|
||||||
class OptimizedFieldElement(ctypes.Structure):
|
|
||||||
_fields_ = [("limbs", ctypes.c_uint64 * 4)]
|
|
||||||
|
|
||||||
class HighPerformanceCUDAZKAccelerator:
|
|
||||||
"""High-performance Python interface for optimized CUDA ZK operations"""
|
|
||||||
|
|
||||||
def __init__(self, lib_path: str = None):
|
|
||||||
"""
|
|
||||||
Initialize high-performance CUDA accelerator
|
|
||||||
|
|
||||||
Args:
|
|
||||||
lib_path: Path to compiled optimized CUDA library (.so file)
|
|
||||||
"""
|
|
||||||
self.lib_path = lib_path or self._find_optimized_cuda_lib()
|
|
||||||
self.lib = None
|
|
||||||
self.initialized = False
|
|
||||||
|
|
||||||
try:
|
|
||||||
self.lib = ctypes.CDLL(self.lib_path)
|
|
||||||
self._setup_function_signatures()
|
|
||||||
self.initialized = True
|
|
||||||
print(f"✅ High-Performance CUDA ZK Accelerator initialized: {self.lib_path}")
|
|
||||||
except Exception as e:
|
|
||||||
print(f"❌ Failed to initialize CUDA accelerator: {e}")
|
|
||||||
self.initialized = False
|
|
||||||
|
|
||||||
def _find_optimized_cuda_lib(self) -> str:
|
|
||||||
"""Find the compiled optimized CUDA library"""
|
|
||||||
possible_paths = [
|
|
||||||
"./liboptimized_field_operations.so",
|
|
||||||
"./optimized_field_operations.so",
|
|
||||||
"../liboptimized_field_operations.so",
|
|
||||||
"../../liboptimized_field_operations.so",
|
|
||||||
"/usr/local/lib/liboptimized_field_operations.so"
|
|
||||||
]
|
|
||||||
|
|
||||||
for path in possible_paths:
|
|
||||||
if os.path.exists(path):
|
|
||||||
return path
|
|
||||||
|
|
||||||
raise FileNotFoundError("Optimized CUDA library not found. Please compile optimized_field_operations.cu first.")
|
|
||||||
|
|
||||||
def _setup_function_signatures(self):
|
|
||||||
"""Setup function signatures for optimized CUDA library functions"""
|
|
||||||
if not self.lib:
|
|
||||||
return
|
|
||||||
|
|
||||||
# Initialize optimized CUDA device
|
|
||||||
self.lib.init_optimized_cuda_device.argtypes = []
|
|
||||||
self.lib.init_optimized_cuda_device.restype = ctypes.c_int
|
|
||||||
|
|
||||||
# Optimized field addition with flat arrays
|
|
||||||
self.lib.gpu_optimized_field_addition.argtypes = [
|
|
||||||
np.ctypeslib.ndpointer(ctypes.c_uint64, flags="C_CONTIGUOUS"),
|
|
||||||
np.ctypeslib.ndpointer(ctypes.c_uint64, flags="C_CONTIGUOUS"),
|
|
||||||
np.ctypeslib.ndpointer(ctypes.c_uint64, flags="C_CONTIGUOUS"),
|
|
||||||
np.ctypeslib.ndpointer(ctypes.c_uint64, flags="C_CONTIGUOUS"),
|
|
||||||
ctypes.c_int
|
|
||||||
]
|
|
||||||
self.lib.gpu_optimized_field_addition.restype = ctypes.c_int
|
|
||||||
|
|
||||||
# Vectorized field addition
|
|
||||||
self.lib.gpu_vectorized_field_addition.argtypes = [
|
|
||||||
np.ctypeslib.ndpointer(ctypes.c_uint64, flags="C_CONTIGUOUS"), # field_vector_t
|
|
||||||
np.ctypeslib.ndpointer(ctypes.c_uint64, flags="C_CONTIGUOUS"),
|
|
||||||
np.ctypeslib.ndpointer(ctypes.c_uint64, flags="C_CONTIGUOUS"),
|
|
||||||
np.ctypeslib.ndpointer(ctypes.c_uint64, flags="C_CONTIGUOUS"),
|
|
||||||
ctypes.c_int
|
|
||||||
]
|
|
||||||
self.lib.gpu_vectorized_field_addition.restype = ctypes.c_int
|
|
||||||
|
|
||||||
# Shared memory field addition
|
|
||||||
self.lib.gpu_shared_memory_field_addition.argtypes = [
|
|
||||||
np.ctypeslib.ndpointer(ctypes.c_uint64, flags="C_CONTIGUOUS"),
|
|
||||||
np.ctypeslib.ndpointer(ctypes.c_uint64, flags="C_CONTIGUOUS"),
|
|
||||||
np.ctypeslib.ndpointer(ctypes.c_uint64, flags="C_CONTIGUOUS"),
|
|
||||||
np.ctypeslib.ndpointer(ctypes.c_uint64, flags="C_CONTIGUOUS"),
|
|
||||||
ctypes.c_int
|
|
||||||
]
|
|
||||||
self.lib.gpu_shared_memory_field_addition.restype = ctypes.c_int
|
|
||||||
|
|
||||||
def init_device(self) -> bool:
|
|
||||||
"""Initialize optimized CUDA device and check capabilities"""
|
|
||||||
if not self.initialized:
|
|
||||||
print("❌ CUDA accelerator not initialized")
|
|
||||||
return False
|
|
||||||
|
|
||||||
try:
|
|
||||||
result = self.lib.init_optimized_cuda_device()
|
|
||||||
if result == 0:
|
|
||||||
print("✅ Optimized CUDA device initialized successfully")
|
|
||||||
return True
|
|
||||||
else:
|
|
||||||
print(f"❌ CUDA device initialization failed: {result}")
|
|
||||||
return False
|
|
||||||
except Exception as e:
|
|
||||||
print(f"❌ CUDA device initialization error: {e}")
|
|
||||||
return False
|
|
||||||
|
|
||||||
def benchmark_optimized_kernels(self, max_elements: int = 10000000) -> dict:
|
|
||||||
"""
|
|
||||||
Benchmark all optimized CUDA kernels and compare performance
|
|
||||||
|
|
||||||
Args:
|
|
||||||
max_elements: Maximum number of elements to test
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
Comprehensive performance benchmark results
|
|
||||||
"""
|
|
||||||
if not self.initialized:
|
|
||||||
return {"error": "CUDA accelerator not initialized"}
|
|
||||||
|
|
||||||
print(f"🚀 High-Performance CUDA Kernel Benchmark (up to {max_elements:,} elements)")
|
|
||||||
print("=" * 80)
|
|
||||||
|
|
||||||
# Test different dataset sizes
|
|
||||||
test_sizes = [
|
|
||||||
1000, # 1K elements
|
|
||||||
10000, # 10K elements
|
|
||||||
100000, # 100K elements
|
|
||||||
1000000, # 1M elements
|
|
||||||
5000000, # 5M elements
|
|
||||||
10000000, # 10M elements
|
|
||||||
]
|
|
||||||
|
|
||||||
results = {
|
|
||||||
"test_sizes": [],
|
|
||||||
"optimized_flat": [],
|
|
||||||
"vectorized": [],
|
|
||||||
"shared_memory": [],
|
|
||||||
"cpu_baseline": [],
|
|
||||||
"performance_summary": {}
|
|
||||||
}
|
|
||||||
|
|
||||||
for size in test_sizes:
|
|
||||||
if size > max_elements:
|
|
||||||
break
|
|
||||||
|
|
||||||
print(f"\n📊 Benchmarking {size:,} elements...")
|
|
||||||
|
|
||||||
# Generate test data as flat arrays for optimal memory access
|
|
||||||
a_flat, b_flat = self._generate_flat_test_data(size)
|
|
||||||
|
|
||||||
# bn128 field modulus (simplified)
|
|
||||||
modulus = [0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF]
|
|
||||||
|
|
||||||
# Benchmark optimized flat array kernel
|
|
||||||
flat_result = self._benchmark_optimized_flat_kernel(a_flat, b_flat, modulus, size)
|
|
||||||
|
|
||||||
# Benchmark vectorized kernel
|
|
||||||
vec_result = self._benchmark_vectorized_kernel(a_flat, b_flat, modulus, size)
|
|
||||||
|
|
||||||
# Benchmark shared memory kernel
|
|
||||||
shared_result = self._benchmark_shared_memory_kernel(a_flat, b_flat, modulus, size)
|
|
||||||
|
|
||||||
# Benchmark CPU baseline
|
|
||||||
cpu_result = self._benchmark_cpu_baseline(a_flat, b_flat, modulus, size)
|
|
||||||
|
|
||||||
# Store results
|
|
||||||
results["test_sizes"].append(size)
|
|
||||||
results["optimized_flat"].append(flat_result)
|
|
||||||
results["vectorized"].append(vec_result)
|
|
||||||
results["shared_memory"].append(shared_result)
|
|
||||||
results["cpu_baseline"].append(cpu_result)
|
|
||||||
|
|
||||||
# Print comparison
|
|
||||||
print(f" Optimized Flat: {flat_result['time']:.4f}s, {flat_result['throughput']:.0f} elem/s")
|
|
||||||
print(f" Vectorized: {vec_result['time']:.4f}s, {vec_result['throughput']:.0f} elem/s")
|
|
||||||
print(f" Shared Memory: {shared_result['time']:.4f}s, {shared_result['throughput']:.0f} elem/s")
|
|
||||||
print(f" CPU Baseline: {cpu_result['time']:.4f}s, {cpu_result['throughput']:.0f} elem/s")
|
|
||||||
|
|
||||||
# Calculate speedups
|
|
||||||
flat_speedup = cpu_result['time'] / flat_result['time'] if flat_result['time'] > 0 else 0
|
|
||||||
vec_speedup = cpu_result['time'] / vec_result['time'] if vec_result['time'] > 0 else 0
|
|
||||||
shared_speedup = cpu_result['time'] / shared_result['time'] if shared_result['time'] > 0 else 0
|
|
||||||
|
|
||||||
print(f" Speedups - Flat: {flat_speedup:.2f}x, Vec: {vec_speedup:.2f}x, Shared: {shared_speedup:.2f}x")
|
|
||||||
|
|
||||||
# Calculate performance summary
|
|
||||||
results["performance_summary"] = self._calculate_performance_summary(results)
|
|
||||||
|
|
||||||
# Print final summary
|
|
||||||
self._print_performance_summary(results["performance_summary"])
|
|
||||||
|
|
||||||
return results
|
|
||||||
|
|
||||||
def _benchmark_optimized_flat_kernel(self, a_flat: np.ndarray, b_flat: np.ndarray,
|
|
||||||
modulus: List[int], num_elements: int) -> dict:
|
|
||||||
"""Benchmark optimized flat array kernel"""
|
|
||||||
try:
|
|
||||||
result_flat = np.zeros_like(a_flat)
|
|
||||||
modulus_array = np.array(modulus, dtype=np.uint64)
|
|
||||||
|
|
||||||
# Multiple runs for consistency
|
|
||||||
times = []
|
|
||||||
for run in range(3):
|
|
||||||
start_time = time.time()
|
|
||||||
success = self.lib.gpu_optimized_field_addition(
|
|
||||||
a_flat, b_flat, result_flat, modulus_array, num_elements
|
|
||||||
)
|
|
||||||
run_time = time.time() - start_time
|
|
||||||
|
|
||||||
if success == 0: # Success
|
|
||||||
times.append(run_time)
|
|
||||||
|
|
||||||
if not times:
|
|
||||||
return {"time": float('inf'), "throughput": 0, "success": False}
|
|
||||||
|
|
||||||
avg_time = sum(times) / len(times)
|
|
||||||
throughput = num_elements / avg_time if avg_time > 0 else 0
|
|
||||||
|
|
||||||
return {"time": avg_time, "throughput": throughput, "success": True}
|
|
||||||
|
|
||||||
except Exception as e:
|
|
||||||
print(f" ❌ Optimized flat kernel error: {e}")
|
|
||||||
return {"time": float('inf'), "throughput": 0, "success": False}
|
|
||||||
|
|
||||||
def _benchmark_vectorized_kernel(self, a_flat: np.ndarray, b_flat: np.ndarray,
|
|
||||||
modulus: List[int], num_elements: int) -> dict:
|
|
||||||
"""Benchmark vectorized kernel"""
|
|
||||||
try:
|
|
||||||
# Convert flat arrays to vectorized format (uint4)
|
|
||||||
# For simplicity, we'll reuse the flat array kernel as vectorized
|
|
||||||
# In practice, would convert to proper vector format
|
|
||||||
result_flat = np.zeros_like(a_flat)
|
|
||||||
modulus_array = np.array(modulus, dtype=np.uint64)
|
|
||||||
|
|
||||||
times = []
|
|
||||||
for run in range(3):
|
|
||||||
start_time = time.time()
|
|
||||||
success = self.lib.gpu_vectorized_field_addition(
|
|
||||||
a_flat, b_flat, result_flat, modulus_array, num_elements
|
|
||||||
)
|
|
||||||
run_time = time.time() - start_time
|
|
||||||
|
|
||||||
if success == 0:
|
|
||||||
times.append(run_time)
|
|
||||||
|
|
||||||
if not times:
|
|
||||||
return {"time": float('inf'), "throughput": 0, "success": False}
|
|
||||||
|
|
||||||
avg_time = sum(times) / len(times)
|
|
||||||
throughput = num_elements / avg_time if avg_time > 0 else 0
|
|
||||||
|
|
||||||
return {"time": avg_time, "throughput": throughput, "success": True}
|
|
||||||
|
|
||||||
except Exception as e:
|
|
||||||
print(f" ❌ Vectorized kernel error: {e}")
|
|
||||||
return {"time": float('inf'), "throughput": 0, "success": False}
|
|
||||||
|
|
||||||
def _benchmark_shared_memory_kernel(self, a_flat: np.ndarray, b_flat: np.ndarray,
|
|
||||||
modulus: List[int], num_elements: int) -> dict:
|
|
||||||
"""Benchmark shared memory kernel"""
|
|
||||||
try:
|
|
||||||
result_flat = np.zeros_like(a_flat)
|
|
||||||
modulus_array = np.array(modulus, dtype=np.uint64)
|
|
||||||
|
|
||||||
times = []
|
|
||||||
for run in range(3):
|
|
||||||
start_time = time.time()
|
|
||||||
success = self.lib.gpu_shared_memory_field_addition(
|
|
||||||
a_flat, b_flat, result_flat, modulus_array, num_elements
|
|
||||||
)
|
|
||||||
run_time = time.time() - start_time
|
|
||||||
|
|
||||||
if success == 0:
|
|
||||||
times.append(run_time)
|
|
||||||
|
|
||||||
if not times:
|
|
||||||
return {"time": float('inf'), "throughput": 0, "success": False}
|
|
||||||
|
|
||||||
avg_time = sum(times) / len(times)
|
|
||||||
throughput = num_elements / avg_time if avg_time > 0 else 0
|
|
||||||
|
|
||||||
return {"time": avg_time, "throughput": throughput, "success": True}
|
|
||||||
|
|
||||||
except Exception as e:
|
|
||||||
print(f" ❌ Shared memory kernel error: {e}")
|
|
||||||
return {"time": float('inf'), "throughput": 0, "success": False}
|
|
||||||
|
|
||||||
def _benchmark_cpu_baseline(self, a_flat: np.ndarray, b_flat: np.ndarray,
|
|
||||||
modulus: List[int], num_elements: int) -> dict:
|
|
||||||
"""Benchmark CPU baseline for comparison"""
|
|
||||||
try:
|
|
||||||
start_time = time.time()
|
|
||||||
|
|
||||||
# Simple CPU field addition
|
|
||||||
result_flat = np.zeros_like(a_flat)
|
|
||||||
for i in range(num_elements):
|
|
||||||
base_idx = i * 4
|
|
||||||
for j in range(4):
|
|
||||||
result_flat[base_idx + j] = (a_flat[base_idx + j] + b_flat[base_idx + j]) % modulus[j]
|
|
||||||
|
|
||||||
cpu_time = time.time() - start_time
|
|
||||||
throughput = num_elements / cpu_time if cpu_time > 0 else 0
|
|
||||||
|
|
||||||
return {"time": cpu_time, "throughput": throughput, "success": True}
|
|
||||||
|
|
||||||
except Exception as e:
|
|
||||||
print(f" ❌ CPU baseline error: {e}")
|
|
||||||
return {"time": float('inf'), "throughput": 0, "success": False}
|
|
||||||
|
|
||||||
def _generate_flat_test_data(self, num_elements: int) -> Tuple[np.ndarray, np.ndarray]:
|
|
||||||
"""Generate flat array test data for optimal memory access"""
|
|
||||||
# Generate flat arrays (num_elements * 4 limbs)
|
|
||||||
flat_size = num_elements * 4
|
|
||||||
|
|
||||||
# Use numpy for fast generation
|
|
||||||
a_flat = np.random.randint(0, 2**32, size=flat_size, dtype=np.uint64)
|
|
||||||
b_flat = np.random.randint(0, 2**32, size=flat_size, dtype=np.uint64)
|
|
||||||
|
|
||||||
return a_flat, b_flat
|
|
||||||
|
|
||||||
def _calculate_performance_summary(self, results: dict) -> dict:
|
|
||||||
"""Calculate performance summary statistics"""
|
|
||||||
summary = {}
|
|
||||||
|
|
||||||
# Find best performing kernel for each size
|
|
||||||
best_speedups = []
|
|
||||||
best_throughputs = []
|
|
||||||
|
|
||||||
for i, size in enumerate(results["test_sizes"]):
|
|
||||||
cpu_time = results["cpu_baseline"][i]["time"]
|
|
||||||
|
|
||||||
# Calculate speedups
|
|
||||||
flat_speedup = cpu_time / results["optimized_flat"][i]["time"] if results["optimized_flat"][i]["time"] > 0 else 0
|
|
||||||
vec_speedup = cpu_time / results["vectorized"][i]["time"] if results["vectorized"][i]["time"] > 0 else 0
|
|
||||||
shared_speedup = cpu_time / results["shared_memory"][i]["time"] if results["shared_memory"][i]["time"] > 0 else 0
|
|
||||||
|
|
||||||
best_speedup = max(flat_speedup, vec_speedup, shared_speedup)
|
|
||||||
best_speedups.append(best_speedup)
|
|
||||||
|
|
||||||
# Find best throughput
|
|
||||||
best_throughput = max(
|
|
||||||
results["optimized_flat"][i]["throughput"],
|
|
||||||
results["vectorized"][i]["throughput"],
|
|
||||||
results["shared_memory"][i]["throughput"]
|
|
||||||
)
|
|
||||||
best_throughputs.append(best_throughput)
|
|
||||||
|
|
||||||
if best_speedups:
|
|
||||||
summary["best_speedup"] = max(best_speedups)
|
|
||||||
summary["average_speedup"] = sum(best_speedups) / len(best_speedups)
|
|
||||||
summary["best_speedup_size"] = results["test_sizes"][best_speedups.index(max(best_speedups))]
|
|
||||||
|
|
||||||
if best_throughputs:
|
|
||||||
summary["best_throughput"] = max(best_throughputs)
|
|
||||||
summary["average_throughput"] = sum(best_throughputs) / len(best_throughputs)
|
|
||||||
summary["best_throughput_size"] = results["test_sizes"][best_throughputs.index(max(best_throughputs))]
|
|
||||||
|
|
||||||
return summary
|
|
||||||
|
|
||||||
def _print_performance_summary(self, summary: dict):
|
|
||||||
"""Print comprehensive performance summary"""
|
|
||||||
print(f"\n🎯 High-Performance CUDA Summary:")
|
|
||||||
print("=" * 50)
|
|
||||||
|
|
||||||
if "best_speedup" in summary:
|
|
||||||
print(f" Best Speedup: {summary['best_speedup']:.2f}x at {summary.get('best_speedup_size', 'N/A'):,} elements")
|
|
||||||
print(f" Average Speedup: {summary['average_speedup']:.2f}x across all tests")
|
|
||||||
|
|
||||||
if "best_throughput" in summary:
|
|
||||||
print(f" Best Throughput: {summary['best_throughput']:.0f} elements/s at {summary.get('best_throughput_size', 'N/A'):,} elements")
|
|
||||||
print(f" Average Throughput: {summary['average_throughput']:.0f} elements/s")
|
|
||||||
|
|
||||||
# Performance classification
|
|
||||||
if summary.get("best_speedup", 0) > 5:
|
|
||||||
print(" 🚀 Performance: EXCELLENT - Significant GPU acceleration achieved")
|
|
||||||
elif summary.get("best_speedup", 0) > 2:
|
|
||||||
print(" ✅ Performance: GOOD - Measurable GPU acceleration achieved")
|
|
||||||
elif summary.get("best_speedup", 0) > 1:
|
|
||||||
print(" ⚠️ Performance: MODERATE - Limited GPU acceleration")
|
|
||||||
else:
|
|
||||||
print(" ❌ Performance: POOR - No significant GPU acceleration")
|
|
||||||
|
|
||||||
def analyze_memory_bandwidth(self, num_elements: int = 1000000) -> dict:
|
|
||||||
"""Analyze memory bandwidth performance"""
|
|
||||||
print(f"🔍 Analyzing Memory Bandwidth Performance ({num_elements:,} elements)...")
|
|
||||||
|
|
||||||
a_flat, b_flat = self._generate_flat_test_data(num_elements)
|
|
||||||
modulus = [0xFFFFFFFFFFFFFFFF] * 4
|
|
||||||
|
|
||||||
# Test different kernels
|
|
||||||
flat_result = self._benchmark_optimized_flat_kernel(a_flat, b_flat, modulus, num_elements)
|
|
||||||
vec_result = self._benchmark_vectorized_kernel(a_flat, b_flat, modulus, num_elements)
|
|
||||||
shared_result = self._benchmark_shared_memory_kernel(a_flat, b_flat, modulus, num_elements)
|
|
||||||
|
|
||||||
# Calculate theoretical bandwidth
|
|
||||||
data_size = num_elements * 4 * 8 * 3 # 3 arrays, 4 limbs, 8 bytes
|
|
||||||
|
|
||||||
analysis = {
|
|
||||||
"data_size_gb": data_size / (1024**3),
|
|
||||||
"flat_bandwidth_gb_s": data_size / (flat_result['time'] * 1024**3) if flat_result['time'] > 0 else 0,
|
|
||||||
"vectorized_bandwidth_gb_s": data_size / (vec_result['time'] * 1024**3) if vec_result['time'] > 0 else 0,
|
|
||||||
"shared_bandwidth_gb_s": data_size / (shared_result['time'] * 1024**3) if shared_result['time'] > 0 else 0,
|
|
||||||
}
|
|
||||||
|
|
||||||
print(f" Data Size: {analysis['data_size_gb']:.2f} GB")
|
|
||||||
print(f" Flat Kernel: {analysis['flat_bandwidth_gb_s']:.2f} GB/s")
|
|
||||||
print(f" Vectorized Kernel: {analysis['vectorized_bandwidth_gb_s']:.2f} GB/s")
|
|
||||||
print(f" Shared Memory Kernel: {analysis['shared_bandwidth_gb_s']:.2f} GB/s")
|
|
||||||
|
|
||||||
return analysis
|
|
||||||
|
|
||||||
def main():
|
|
||||||
"""Main function for testing high-performance CUDA acceleration"""
|
|
||||||
print("🚀 AITBC High-Performance CUDA ZK Accelerator Test")
|
|
||||||
print("=" * 60)
|
|
||||||
|
|
||||||
try:
|
|
||||||
# Initialize high-performance accelerator
|
|
||||||
accelerator = HighPerformanceCUDAZKAccelerator()
|
|
||||||
|
|
||||||
if not accelerator.initialized:
|
|
||||||
print("❌ Failed to initialize CUDA accelerator")
|
|
||||||
return
|
|
||||||
|
|
||||||
# Initialize device
|
|
||||||
if not accelerator.init_device():
|
|
||||||
return
|
|
||||||
|
|
||||||
# Run comprehensive benchmark
|
|
||||||
results = accelerator.benchmark_optimized_kernels(10000000)
|
|
||||||
|
|
||||||
# Analyze memory bandwidth
|
|
||||||
bandwidth_analysis = accelerator.analyze_memory_bandwidth(1000000)
|
|
||||||
|
|
||||||
print("\n✅ High-Performance CUDA acceleration test completed!")
|
|
||||||
|
|
||||||
if results.get("performance_summary", {}).get("best_speedup", 0) > 1:
|
|
||||||
print(f"🚀 Optimization successful: {results['performance_summary']['best_speedup']:.2f}x speedup achieved")
|
|
||||||
else:
|
|
||||||
print("⚠️ Further optimization needed")
|
|
||||||
|
|
||||||
except Exception as e:
|
|
||||||
print(f"❌ Test failed: {e}")
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
|
||||||
main()
|
|
||||||
@@ -1,609 +0,0 @@
|
|||||||
#!/usr/bin/env python3
|
|
||||||
"""
|
|
||||||
Production-Ready CUDA ZK Accelerator API
|
|
||||||
Integrates optimized CUDA kernels with AITBC ZK workflow and Coordinator API
|
|
||||||
"""
|
|
||||||
|
|
||||||
import os
|
|
||||||
import sys
|
|
||||||
import json
|
|
||||||
import time
|
|
||||||
import logging
|
|
||||||
import asyncio
|
|
||||||
from typing import Dict, List, Optional, Tuple, Any
|
|
||||||
from dataclasses import dataclass, asdict
|
|
||||||
from pathlib import Path
|
|
||||||
import numpy as np
|
|
||||||
|
|
||||||
# Configure CUDA library paths before importing CUDA modules
|
|
||||||
import os
|
|
||||||
os.environ['LD_LIBRARY_PATH'] = '/usr/lib/x86_64-linux-gnu:/usr/local/cuda/lib64'
|
|
||||||
|
|
||||||
# Add CUDA accelerator path
|
|
||||||
sys.path.append('/home/oib/windsurf/aitbc/gpu_acceleration')
|
|
||||||
|
|
||||||
try:
|
|
||||||
from high_performance_cuda_accelerator import HighPerformanceCUDAZKAccelerator
|
|
||||||
CUDA_AVAILABLE = True
|
|
||||||
except ImportError as e:
|
|
||||||
CUDA_AVAILABLE = False
|
|
||||||
print(f"⚠️ CUDA accelerator import failed: {e}")
|
|
||||||
print(" Falling back to CPU operations")
|
|
||||||
|
|
||||||
# Configure logging
|
|
||||||
logging.basicConfig(
|
|
||||||
level=logging.INFO,
|
|
||||||
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
|
|
||||||
)
|
|
||||||
logger = logging.getLogger("CUDA_ZK_API")
|
|
||||||
|
|
||||||
@dataclass
|
|
||||||
class ZKOperationRequest:
|
|
||||||
"""Request structure for ZK operations"""
|
|
||||||
operation_type: str # 'field_addition', 'constraint_verification', 'witness_generation'
|
|
||||||
circuit_data: Dict[str, Any]
|
|
||||||
witness_data: Optional[Dict[str, Any]] = None
|
|
||||||
constraints: Optional[List[Dict[str, Any]]] = None
|
|
||||||
optimization_level: str = "high" # 'low', 'medium', 'high'
|
|
||||||
use_gpu: bool = True
|
|
||||||
timeout_seconds: int = 300
|
|
||||||
|
|
||||||
@dataclass
|
|
||||||
class ZKOperationResult:
|
|
||||||
"""Result structure for ZK operations"""
|
|
||||||
success: bool
|
|
||||||
operation_type: str
|
|
||||||
execution_time: float
|
|
||||||
gpu_used: bool
|
|
||||||
speedup: Optional[float] = None
|
|
||||||
throughput: Optional[float] = None
|
|
||||||
result_data: Optional[Dict[str, Any]] = None
|
|
||||||
error_message: Optional[str] = None
|
|
||||||
performance_metrics: Optional[Dict[str, Any]] = None
|
|
||||||
|
|
||||||
class ProductionCUDAZKAPI:
|
|
||||||
"""Production-ready CUDA ZK Accelerator API"""
|
|
||||||
|
|
||||||
def __init__(self):
|
|
||||||
"""Initialize the production CUDA ZK API"""
|
|
||||||
self.cuda_accelerator = None
|
|
||||||
self.initialized = False
|
|
||||||
self.performance_cache = {}
|
|
||||||
self.operation_stats = {
|
|
||||||
"total_operations": 0,
|
|
||||||
"gpu_operations": 0,
|
|
||||||
"cpu_operations": 0,
|
|
||||||
"total_time": 0.0,
|
|
||||||
"average_speedup": 0.0
|
|
||||||
}
|
|
||||||
|
|
||||||
# Initialize CUDA accelerator
|
|
||||||
self._initialize_cuda_accelerator()
|
|
||||||
|
|
||||||
logger.info("🚀 Production CUDA ZK API initialized")
|
|
||||||
logger.info(f" CUDA Available: {CUDA_AVAILABLE}")
|
|
||||||
logger.info(f" GPU Accelerator: {'Ready' if self.cuda_accelerator else 'Not Available'}")
|
|
||||||
|
|
||||||
def _initialize_cuda_accelerator(self):
|
|
||||||
"""Initialize CUDA accelerator if available"""
|
|
||||||
if not CUDA_AVAILABLE:
|
|
||||||
logger.warning("CUDA not available, using CPU-only operations")
|
|
||||||
return
|
|
||||||
|
|
||||||
try:
|
|
||||||
self.cuda_accelerator = HighPerformanceCUDAZKAccelerator()
|
|
||||||
if self.cuda_accelerator.init_device():
|
|
||||||
self.initialized = True
|
|
||||||
logger.info("✅ CUDA accelerator initialized successfully")
|
|
||||||
else:
|
|
||||||
logger.error("❌ Failed to initialize CUDA device")
|
|
||||||
self.cuda_accelerator = None
|
|
||||||
except Exception as e:
|
|
||||||
logger.error(f"❌ CUDA accelerator initialization failed: {e}")
|
|
||||||
self.cuda_accelerator = None
|
|
||||||
|
|
||||||
async def process_zk_operation(self, request: ZKOperationRequest) -> ZKOperationResult:
|
|
||||||
"""
|
|
||||||
Process a ZK operation with GPU acceleration
|
|
||||||
|
|
||||||
Args:
|
|
||||||
request: ZK operation request
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
ZK operation result
|
|
||||||
"""
|
|
||||||
start_time = time.time()
|
|
||||||
operation_type = request.operation_type
|
|
||||||
|
|
||||||
logger.info(f"🔄 Processing {operation_type} operation")
|
|
||||||
logger.info(f" GPU Requested: {request.use_gpu}")
|
|
||||||
logger.info(f" Optimization Level: {request.optimization_level}")
|
|
||||||
|
|
||||||
try:
|
|
||||||
# Update statistics
|
|
||||||
self.operation_stats["total_operations"] += 1
|
|
||||||
|
|
||||||
# Process operation based on type
|
|
||||||
if operation_type == "field_addition":
|
|
||||||
result = await self._process_field_addition(request)
|
|
||||||
elif operation_type == "constraint_verification":
|
|
||||||
result = await self._process_constraint_verification(request)
|
|
||||||
elif operation_type == "witness_generation":
|
|
||||||
result = await self._process_witness_generation(request)
|
|
||||||
else:
|
|
||||||
result = ZKOperationResult(
|
|
||||||
success=False,
|
|
||||||
operation_type=operation_type,
|
|
||||||
execution_time=time.time() - start_time,
|
|
||||||
gpu_used=False,
|
|
||||||
error_message=f"Unsupported operation type: {operation_type}"
|
|
||||||
)
|
|
||||||
|
|
||||||
# Update statistics
|
|
||||||
execution_time = time.time() - start_time
|
|
||||||
self.operation_stats["total_time"] += execution_time
|
|
||||||
|
|
||||||
if result.gpu_used:
|
|
||||||
self.operation_stats["gpu_operations"] += 1
|
|
||||||
if result.speedup:
|
|
||||||
self._update_average_speedup(result.speedup)
|
|
||||||
else:
|
|
||||||
self.operation_stats["cpu_operations"] += 1
|
|
||||||
|
|
||||||
logger.info(f"✅ Operation completed in {execution_time:.4f}s")
|
|
||||||
if result.speedup:
|
|
||||||
logger.info(f" Speedup: {result.speedup:.2f}x")
|
|
||||||
|
|
||||||
return result
|
|
||||||
|
|
||||||
except Exception as e:
|
|
||||||
logger.error(f"❌ Operation failed: {e}")
|
|
||||||
return ZKOperationResult(
|
|
||||||
success=False,
|
|
||||||
operation_type=operation_type,
|
|
||||||
execution_time=time.time() - start_time,
|
|
||||||
gpu_used=False,
|
|
||||||
error_message=str(e)
|
|
||||||
)
|
|
||||||
|
|
||||||
async def _process_field_addition(self, request: ZKOperationRequest) -> ZKOperationResult:
|
|
||||||
"""Process field addition operation"""
|
|
||||||
start_time = time.time()
|
|
||||||
|
|
||||||
# Extract field data from request
|
|
||||||
circuit_data = request.circuit_data
|
|
||||||
num_elements = circuit_data.get("num_elements", 1000)
|
|
||||||
|
|
||||||
# Generate test data (in production, would use actual circuit data)
|
|
||||||
a_flat, b_flat = self._generate_field_data(num_elements)
|
|
||||||
modulus = circuit_data.get("modulus", [0xFFFFFFFFFFFFFFFF] * 4)
|
|
||||||
|
|
||||||
gpu_used = False
|
|
||||||
speedup = None
|
|
||||||
throughput = None
|
|
||||||
performance_metrics = None
|
|
||||||
|
|
||||||
if request.use_gpu and self.cuda_accelerator and self.initialized:
|
|
||||||
# Use GPU acceleration
|
|
||||||
try:
|
|
||||||
gpu_result = self.cuda_accelerator._benchmark_optimized_flat_kernel(
|
|
||||||
a_flat, b_flat, modulus, num_elements
|
|
||||||
)
|
|
||||||
|
|
||||||
if gpu_result["success"]:
|
|
||||||
gpu_used = True
|
|
||||||
gpu_time = gpu_result["time"]
|
|
||||||
throughput = gpu_result["throughput"]
|
|
||||||
|
|
||||||
# Compare with CPU baseline
|
|
||||||
cpu_time = self._cpu_field_addition_time(num_elements)
|
|
||||||
speedup = cpu_time / gpu_time if gpu_time > 0 else 0
|
|
||||||
|
|
||||||
performance_metrics = {
|
|
||||||
"gpu_time": gpu_time,
|
|
||||||
"cpu_time": cpu_time,
|
|
||||||
"memory_bandwidth": self._estimate_memory_bandwidth(num_elements, gpu_time),
|
|
||||||
"gpu_utilization": self._estimate_gpu_utilization(num_elements)
|
|
||||||
}
|
|
||||||
|
|
||||||
logger.info(f"🚀 GPU field addition completed")
|
|
||||||
logger.info(f" GPU Time: {gpu_time:.4f}s")
|
|
||||||
logger.info(f" CPU Time: {cpu_time:.4f}s")
|
|
||||||
logger.info(f" Speedup: {speedup:.2f}x")
|
|
||||||
|
|
||||||
else:
|
|
||||||
logger.warning("GPU operation failed, falling back to CPU")
|
|
||||||
|
|
||||||
except Exception as e:
|
|
||||||
logger.warning(f"GPU operation failed: {e}, falling back to CPU")
|
|
||||||
|
|
||||||
# CPU fallback
|
|
||||||
if not gpu_used:
|
|
||||||
cpu_time = self._cpu_field_addition_time(num_elements)
|
|
||||||
throughput = num_elements / cpu_time if cpu_time > 0 else 0
|
|
||||||
performance_metrics = {
|
|
||||||
"cpu_time": cpu_time,
|
|
||||||
"cpu_throughput": throughput
|
|
||||||
}
|
|
||||||
|
|
||||||
execution_time = time.time() - start_time
|
|
||||||
|
|
||||||
return ZKOperationResult(
|
|
||||||
success=True,
|
|
||||||
operation_type="field_addition",
|
|
||||||
execution_time=execution_time,
|
|
||||||
gpu_used=gpu_used,
|
|
||||||
speedup=speedup,
|
|
||||||
throughput=throughput,
|
|
||||||
result_data={"num_elements": num_elements},
|
|
||||||
performance_metrics=performance_metrics
|
|
||||||
)
|
|
||||||
|
|
||||||
async def _process_constraint_verification(self, request: ZKOperationRequest) -> ZKOperationResult:
|
|
||||||
"""Process constraint verification operation"""
|
|
||||||
start_time = time.time()
|
|
||||||
|
|
||||||
# Extract constraint data
|
|
||||||
constraints = request.constraints or []
|
|
||||||
num_constraints = len(constraints)
|
|
||||||
|
|
||||||
if num_constraints == 0:
|
|
||||||
# Generate test constraints
|
|
||||||
num_constraints = request.circuit_data.get("num_constraints", 1000)
|
|
||||||
constraints = self._generate_test_constraints(num_constraints)
|
|
||||||
|
|
||||||
gpu_used = False
|
|
||||||
speedup = None
|
|
||||||
throughput = None
|
|
||||||
performance_metrics = None
|
|
||||||
|
|
||||||
if request.use_gpu and self.cuda_accelerator and self.initialized:
|
|
||||||
try:
|
|
||||||
# Use GPU for constraint verification
|
|
||||||
gpu_time = self._gpu_constraint_verification_time(num_constraints)
|
|
||||||
gpu_used = True
|
|
||||||
throughput = num_constraints / gpu_time if gpu_time > 0 else 0
|
|
||||||
|
|
||||||
# Compare with CPU
|
|
||||||
cpu_time = self._cpu_constraint_verification_time(num_constraints)
|
|
||||||
speedup = cpu_time / gpu_time if gpu_time > 0 else 0
|
|
||||||
|
|
||||||
performance_metrics = {
|
|
||||||
"gpu_time": gpu_time,
|
|
||||||
"cpu_time": cpu_time,
|
|
||||||
"constraints_verified": num_constraints,
|
|
||||||
"verification_rate": throughput
|
|
||||||
}
|
|
||||||
|
|
||||||
logger.info(f"🚀 GPU constraint verification completed")
|
|
||||||
logger.info(f" Constraints: {num_constraints}")
|
|
||||||
logger.info(f" Speedup: {speedup:.2f}x")
|
|
||||||
|
|
||||||
except Exception as e:
|
|
||||||
logger.warning(f"GPU constraint verification failed: {e}, falling back to CPU")
|
|
||||||
|
|
||||||
# CPU fallback
|
|
||||||
if not gpu_used:
|
|
||||||
cpu_time = self._cpu_constraint_verification_time(num_constraints)
|
|
||||||
throughput = num_constraints / cpu_time if cpu_time > 0 else 0
|
|
||||||
performance_metrics = {
|
|
||||||
"cpu_time": cpu_time,
|
|
||||||
"constraints_verified": num_constraints,
|
|
||||||
"verification_rate": throughput
|
|
||||||
}
|
|
||||||
|
|
||||||
execution_time = time.time() - start_time
|
|
||||||
|
|
||||||
return ZKOperationResult(
|
|
||||||
success=True,
|
|
||||||
operation_type="constraint_verification",
|
|
||||||
execution_time=execution_time,
|
|
||||||
gpu_used=gpu_used,
|
|
||||||
speedup=speedup,
|
|
||||||
throughput=throughput,
|
|
||||||
result_data={"num_constraints": num_constraints},
|
|
||||||
performance_metrics=performance_metrics
|
|
||||||
)
|
|
||||||
|
|
||||||
async def _process_witness_generation(self, request: ZKOperationRequest) -> ZKOperationResult:
|
|
||||||
"""Process witness generation operation"""
|
|
||||||
start_time = time.time()
|
|
||||||
|
|
||||||
# Extract witness data
|
|
||||||
witness_data = request.witness_data or {}
|
|
||||||
num_inputs = witness_data.get("num_inputs", 1000)
|
|
||||||
witness_size = witness_data.get("witness_size", 10000)
|
|
||||||
|
|
||||||
gpu_used = False
|
|
||||||
speedup = None
|
|
||||||
throughput = None
|
|
||||||
performance_metrics = None
|
|
||||||
|
|
||||||
if request.use_gpu and self.cuda_accelerator and self.initialized:
|
|
||||||
try:
|
|
||||||
# Use GPU for witness generation
|
|
||||||
gpu_time = self._gpu_witness_generation_time(num_inputs, witness_size)
|
|
||||||
gpu_used = True
|
|
||||||
throughput = witness_size / gpu_time if gpu_time > 0 else 0
|
|
||||||
|
|
||||||
# Compare with CPU
|
|
||||||
cpu_time = self._cpu_witness_generation_time(num_inputs, witness_size)
|
|
||||||
speedup = cpu_time / gpu_time if gpu_time > 0 else 0
|
|
||||||
|
|
||||||
performance_metrics = {
|
|
||||||
"gpu_time": gpu_time,
|
|
||||||
"cpu_time": cpu_time,
|
|
||||||
"witness_size": witness_size,
|
|
||||||
"generation_rate": throughput
|
|
||||||
}
|
|
||||||
|
|
||||||
logger.info(f"🚀 GPU witness generation completed")
|
|
||||||
logger.info(f" Witness Size: {witness_size}")
|
|
||||||
logger.info(f" Speedup: {speedup:.2f}x")
|
|
||||||
|
|
||||||
except Exception as e:
|
|
||||||
logger.warning(f"GPU witness generation failed: {e}, falling back to CPU")
|
|
||||||
|
|
||||||
# CPU fallback
|
|
||||||
if not gpu_used:
|
|
||||||
cpu_time = self._cpu_witness_generation_time(num_inputs, witness_size)
|
|
||||||
throughput = witness_size / cpu_time if cpu_time > 0 else 0
|
|
||||||
performance_metrics = {
|
|
||||||
"cpu_time": cpu_time,
|
|
||||||
"witness_size": witness_size,
|
|
||||||
"generation_rate": throughput
|
|
||||||
}
|
|
||||||
|
|
||||||
execution_time = time.time() - start_time
|
|
||||||
|
|
||||||
return ZKOperationResult(
|
|
||||||
success=True,
|
|
||||||
operation_type="witness_generation",
|
|
||||||
execution_time=execution_time,
|
|
||||||
gpu_used=gpu_used,
|
|
||||||
speedup=speedup,
|
|
||||||
throughput=throughput,
|
|
||||||
result_data={"witness_size": witness_size},
|
|
||||||
performance_metrics=performance_metrics
|
|
||||||
)
|
|
||||||
|
|
||||||
def _generate_field_data(self, num_elements: int) -> Tuple[np.ndarray, np.ndarray]:
|
|
||||||
"""Generate field test data"""
|
|
||||||
flat_size = num_elements * 4
|
|
||||||
a_flat = np.random.randint(0, 2**32, size=flat_size, dtype=np.uint64)
|
|
||||||
b_flat = np.random.randint(0, 2**32, size=flat_size, dtype=np.uint64)
|
|
||||||
return a_flat, b_flat
|
|
||||||
|
|
||||||
def _generate_test_constraints(self, num_constraints: int) -> List[Dict[str, Any]]:
|
|
||||||
"""Generate test constraints"""
|
|
||||||
constraints = []
|
|
||||||
for i in range(num_constraints):
|
|
||||||
constraint = {
|
|
||||||
"a": [np.random.randint(0, 2**32) for _ in range(4)],
|
|
||||||
"b": [np.random.randint(0, 2**32) for _ in range(4)],
|
|
||||||
"c": [np.random.randint(0, 2**32) for _ in range(4)],
|
|
||||||
"operation": np.random.choice([0, 1])
|
|
||||||
}
|
|
||||||
constraints.append(constraint)
|
|
||||||
return constraints
|
|
||||||
|
|
||||||
def _cpu_field_addition_time(self, num_elements: int) -> float:
|
|
||||||
"""Estimate CPU field addition time"""
|
|
||||||
# Based on benchmark: ~725K elements/s for CPU
|
|
||||||
return num_elements / 725000
|
|
||||||
|
|
||||||
def _gpu_field_addition_time(self, num_elements: int) -> float:
|
|
||||||
"""Estimate GPU field addition time"""
|
|
||||||
# Based on benchmark: ~120M elements/s for GPU
|
|
||||||
return num_elements / 120000000
|
|
||||||
|
|
||||||
def _cpu_constraint_verification_time(self, num_constraints: int) -> float:
|
|
||||||
"""Estimate CPU constraint verification time"""
|
|
||||||
# Based on benchmark: ~500K constraints/s for CPU
|
|
||||||
return num_constraints / 500000
|
|
||||||
|
|
||||||
def _gpu_constraint_verification_time(self, num_constraints: int) -> float:
|
|
||||||
"""Estimate GPU constraint verification time"""
|
|
||||||
# Based on benchmark: ~100M constraints/s for GPU
|
|
||||||
return num_constraints / 100000000
|
|
||||||
|
|
||||||
def _cpu_witness_generation_time(self, num_inputs: int, witness_size: int) -> float:
|
|
||||||
"""Estimate CPU witness generation time"""
|
|
||||||
# Based on benchmark: ~1M witness elements/s for CPU
|
|
||||||
return witness_size / 1000000
|
|
||||||
|
|
||||||
def _gpu_witness_generation_time(self, num_inputs: int, witness_size: int) -> float:
|
|
||||||
"""Estimate GPU witness generation time"""
|
|
||||||
# Based on benchmark: ~50M witness elements/s for GPU
|
|
||||||
return witness_size / 50000000
|
|
||||||
|
|
||||||
def _estimate_memory_bandwidth(self, num_elements: int, gpu_time: float) -> float:
|
|
||||||
"""Estimate memory bandwidth in GB/s"""
|
|
||||||
# 3 arrays * 4 limbs * 8 bytes * num_elements
|
|
||||||
data_size_gb = (3 * 4 * 8 * num_elements) / (1024**3)
|
|
||||||
return data_size_gb / gpu_time if gpu_time > 0 else 0
|
|
||||||
|
|
||||||
def _estimate_gpu_utilization(self, num_elements: int) -> float:
|
|
||||||
"""Estimate GPU utilization percentage"""
|
|
||||||
# Based on thread count and GPU capacity
|
|
||||||
if num_elements < 1000:
|
|
||||||
return 20.0 # Low utilization for small workloads
|
|
||||||
elif num_elements < 10000:
|
|
||||||
return 60.0 # Medium utilization
|
|
||||||
elif num_elements < 100000:
|
|
||||||
return 85.0 # High utilization
|
|
||||||
else:
|
|
||||||
return 95.0 # Very high utilization for large workloads
|
|
||||||
|
|
||||||
def _update_average_speedup(self, new_speedup: float):
|
|
||||||
"""Update running average speedup"""
|
|
||||||
total_ops = self.operation_stats["gpu_operations"]
|
|
||||||
if total_ops == 1:
|
|
||||||
self.operation_stats["average_speedup"] = new_speedup
|
|
||||||
else:
|
|
||||||
current_avg = self.operation_stats["average_speedup"]
|
|
||||||
self.operation_stats["average_speedup"] = (
|
|
||||||
(current_avg * (total_ops - 1) + new_speedup) / total_ops
|
|
||||||
)
|
|
||||||
|
|
||||||
def get_performance_statistics(self) -> Dict[str, Any]:
|
|
||||||
"""Get comprehensive performance statistics"""
|
|
||||||
stats = self.operation_stats.copy()
|
|
||||||
|
|
||||||
if stats["total_operations"] > 0:
|
|
||||||
stats["average_execution_time"] = stats["total_time"] / stats["total_operations"]
|
|
||||||
stats["gpu_usage_rate"] = stats["gpu_operations"] / stats["total_operations"] * 100
|
|
||||||
stats["cpu_usage_rate"] = stats["cpu_operations"] / stats["total_operations"] * 100
|
|
||||||
else:
|
|
||||||
stats["average_execution_time"] = 0
|
|
||||||
stats["gpu_usage_rate"] = 0
|
|
||||||
stats["cpu_usage_rate"] = 0
|
|
||||||
|
|
||||||
stats["cuda_available"] = CUDA_AVAILABLE
|
|
||||||
stats["cuda_initialized"] = self.initialized
|
|
||||||
stats["gpu_device"] = "NVIDIA GeForce RTX 4060 Ti" if self.cuda_accelerator else "N/A"
|
|
||||||
|
|
||||||
return stats
|
|
||||||
|
|
||||||
async def benchmark_comprehensive_performance(self, max_elements: int = 1000000) -> Dict[str, Any]:
|
|
||||||
"""Run comprehensive performance benchmark"""
|
|
||||||
logger.info(f"🚀 Running comprehensive performance benchmark up to {max_elements:,} elements")
|
|
||||||
|
|
||||||
benchmark_results = {
|
|
||||||
"field_addition": [],
|
|
||||||
"constraint_verification": [],
|
|
||||||
"witness_generation": [],
|
|
||||||
"summary": {}
|
|
||||||
}
|
|
||||||
|
|
||||||
test_sizes = [1000, 10000, 100000, max_elements]
|
|
||||||
|
|
||||||
for size in test_sizes:
|
|
||||||
logger.info(f"📊 Benchmarking {size:,} elements...")
|
|
||||||
|
|
||||||
# Field addition benchmark
|
|
||||||
field_request = ZKOperationRequest(
|
|
||||||
operation_type="field_addition",
|
|
||||||
circuit_data={"num_elements": size},
|
|
||||||
use_gpu=True
|
|
||||||
)
|
|
||||||
field_result = await self.process_zk_operation(field_request)
|
|
||||||
benchmark_results["field_addition"].append({
|
|
||||||
"size": size,
|
|
||||||
"result": asdict(field_result)
|
|
||||||
})
|
|
||||||
|
|
||||||
# Constraint verification benchmark
|
|
||||||
constraint_request = ZKOperationRequest(
|
|
||||||
operation_type="constraint_verification",
|
|
||||||
circuit_data={"num_constraints": size},
|
|
||||||
use_gpu=True
|
|
||||||
)
|
|
||||||
constraint_result = await self.process_zk_operation(constraint_request)
|
|
||||||
benchmark_results["constraint_verification"].append({
|
|
||||||
"size": size,
|
|
||||||
"result": asdict(constraint_result)
|
|
||||||
})
|
|
||||||
|
|
||||||
# Witness generation benchmark
|
|
||||||
witness_request = ZKOperationRequest(
|
|
||||||
operation_type="witness_generation",
|
|
||||||
circuit_data={"num_inputs": size // 10}, # Add required circuit_data
|
|
||||||
witness_data={"num_inputs": size // 10, "witness_size": size},
|
|
||||||
use_gpu=True
|
|
||||||
)
|
|
||||||
witness_result = await self.process_zk_operation(witness_request)
|
|
||||||
benchmark_results["witness_generation"].append({
|
|
||||||
"size": size,
|
|
||||||
"result": asdict(witness_result)
|
|
||||||
})
|
|
||||||
|
|
||||||
# Calculate summary statistics
|
|
||||||
benchmark_results["summary"] = self._calculate_benchmark_summary(benchmark_results)
|
|
||||||
|
|
||||||
logger.info("✅ Comprehensive benchmark completed")
|
|
||||||
return benchmark_results
|
|
||||||
|
|
||||||
def _calculate_benchmark_summary(self, results: Dict[str, Any]) -> Dict[str, Any]:
|
|
||||||
"""Calculate benchmark summary statistics"""
|
|
||||||
summary = {}
|
|
||||||
|
|
||||||
for operation_type in ["field_addition", "constraint_verification", "witness_generation"]:
|
|
||||||
operation_results = results[operation_type]
|
|
||||||
|
|
||||||
speedups = [r["result"]["speedup"] for r in operation_results if r["result"]["speedup"]]
|
|
||||||
throughputs = [r["result"]["throughput"] for r in operation_results if r["result"]["throughput"]]
|
|
||||||
|
|
||||||
if speedups:
|
|
||||||
summary[f"{operation_type}_avg_speedup"] = sum(speedups) / len(speedups)
|
|
||||||
summary[f"{operation_type}_max_speedup"] = max(speedups)
|
|
||||||
|
|
||||||
if throughputs:
|
|
||||||
summary[f"{operation_type}_avg_throughput"] = sum(throughputs) / len(throughputs)
|
|
||||||
summary[f"{operation_type}_max_throughput"] = max(throughputs)
|
|
||||||
|
|
||||||
return summary
|
|
||||||
|
|
||||||
# Global API instance
|
|
||||||
cuda_zk_api = ProductionCUDAZKAPI()
|
|
||||||
|
|
||||||
async def main():
|
|
||||||
"""Main function for testing the production API"""
|
|
||||||
print("🚀 AITBC Production CUDA ZK API Test")
|
|
||||||
print("=" * 50)
|
|
||||||
|
|
||||||
try:
|
|
||||||
# Test field addition
|
|
||||||
print("\n📊 Testing Field Addition...")
|
|
||||||
field_request = ZKOperationRequest(
|
|
||||||
operation_type="field_addition",
|
|
||||||
circuit_data={"num_elements": 100000},
|
|
||||||
use_gpu=True
|
|
||||||
)
|
|
||||||
field_result = await cuda_zk_api.process_zk_operation(field_request)
|
|
||||||
print(f" Result: {field_result.success}")
|
|
||||||
print(f" GPU Used: {field_result.gpu_used}")
|
|
||||||
print(f" Speedup: {field_result.speedup:.2f}x" if field_result.speedup else " Speedup: N/A")
|
|
||||||
|
|
||||||
# Test constraint verification
|
|
||||||
print("\n📊 Testing Constraint Verification...")
|
|
||||||
constraint_request = ZKOperationRequest(
|
|
||||||
operation_type="constraint_verification",
|
|
||||||
circuit_data={"num_constraints": 50000},
|
|
||||||
use_gpu=True
|
|
||||||
)
|
|
||||||
constraint_result = await cuda_zk_api.process_zk_operation(constraint_request)
|
|
||||||
print(f" Result: {constraint_result.success}")
|
|
||||||
print(f" GPU Used: {constraint_result.gpu_used}")
|
|
||||||
print(f" Speedup: {constraint_result.speedup:.2f}x" if constraint_result.speedup else " Speedup: N/A")
|
|
||||||
|
|
||||||
# Test witness generation
|
|
||||||
print("\n📊 Testing Witness Generation...")
|
|
||||||
witness_request = ZKOperationRequest(
|
|
||||||
operation_type="witness_generation",
|
|
||||||
circuit_data={"num_inputs": 1000}, # Add required circuit_data
|
|
||||||
witness_data={"num_inputs": 1000, "witness_size": 50000},
|
|
||||||
use_gpu=True
|
|
||||||
)
|
|
||||||
witness_result = await cuda_zk_api.process_zk_operation(witness_request)
|
|
||||||
print(f" Result: {witness_result.success}")
|
|
||||||
print(f" GPU Used: {witness_result.gpu_used}")
|
|
||||||
print(f" Speedup: {witness_result.speedup:.2f}x" if witness_result.speedup else " Speedup: N/A")
|
|
||||||
|
|
||||||
# Get performance statistics
|
|
||||||
print("\n📊 Performance Statistics:")
|
|
||||||
stats = cuda_zk_api.get_performance_statistics()
|
|
||||||
for key, value in stats.items():
|
|
||||||
print(f" {key}: {value}")
|
|
||||||
|
|
||||||
# Run comprehensive benchmark
|
|
||||||
print("\n🚀 Running Comprehensive Benchmark...")
|
|
||||||
benchmark_results = await cuda_zk_api.benchmark_comprehensive_performance(100000)
|
|
||||||
|
|
||||||
print("\n✅ Production API test completed successfully!")
|
|
||||||
|
|
||||||
except Exception as e:
|
|
||||||
print(f"❌ Test failed: {e}")
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
|
||||||
asyncio.run(main())
|
|
||||||
Reference in New Issue
Block a user