feat: add marketplace metrics, privacy features, and service registry endpoints
- Add Prometheus metrics for marketplace API throughput and error rates with new dashboard panels - Implement confidential transaction models with encryption support and access control - Add key management system with registration, rotation, and audit logging - Create services and registry routers for service discovery and management - Integrate ZK proof generation for privacy-preserving receipts - Add metrics instru
This commit is contained in:
277
apps/blockchain-node/scripts/benchmark_throughput.py
Executable file
277
apps/blockchain-node/scripts/benchmark_throughput.py
Executable file
@ -0,0 +1,277 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Blockchain Node Throughput Benchmark
|
||||
|
||||
This script simulates sustained load on the blockchain node to measure:
|
||||
- Transactions per second (TPS)
|
||||
- Latency percentiles (p50, p95, p99)
|
||||
- CPU and memory usage
|
||||
- Queue depth and saturation points
|
||||
|
||||
Usage:
|
||||
python benchmark_throughput.py --concurrent-clients 100 --duration 60 --target-url http://localhost:8080
|
||||
"""
|
||||
|
||||
import asyncio
|
||||
import aiohttp
|
||||
import time
|
||||
import statistics
|
||||
import psutil
|
||||
import argparse
|
||||
import json
|
||||
from typing import List, Dict, Any
|
||||
from dataclasses import dataclass
|
||||
import logging
|
||||
|
||||
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
@dataclass
|
||||
class BenchmarkResult:
|
||||
"""Results from a benchmark run"""
|
||||
total_transactions: int
|
||||
duration: float
|
||||
tps: float
|
||||
latency_p50: float
|
||||
latency_p95: float
|
||||
latency_p99: float
|
||||
cpu_usage: float
|
||||
memory_usage: float
|
||||
errors: int
|
||||
|
||||
|
||||
class BlockchainBenchmark:
|
||||
"""Benchmark client for blockchain node"""
|
||||
|
||||
def __init__(self, base_url: str):
|
||||
self.base_url = base_url.rstrip('/')
|
||||
self.session = None
|
||||
|
||||
async def __aenter__(self):
|
||||
self.session = aiohttp.ClientSession(timeout=aiohttp.ClientTimeout(total=30))
|
||||
return self
|
||||
|
||||
async def __aexit__(self, exc_type, exc_val, exc_tb):
|
||||
if self.session:
|
||||
await self.session.close()
|
||||
|
||||
async def submit_transaction(self, payload: Dict[str, Any]) -> Dict[str, Any]:
|
||||
"""Submit a single transaction"""
|
||||
start_time = time.time()
|
||||
try:
|
||||
async with self.session.post(
|
||||
f"{self.base_url}/v1/transactions",
|
||||
json=payload
|
||||
) as response:
|
||||
if response.status == 200:
|
||||
result = await response.json()
|
||||
latency = (time.time() - start_time) * 1000 # ms
|
||||
return {"success": True, "latency": latency, "tx_id": result.get("tx_id")}
|
||||
else:
|
||||
return {"success": False, "error": f"HTTP {response.status}"}
|
||||
except Exception as e:
|
||||
return {"success": False, "error": str(e)}
|
||||
|
||||
async def get_block_height(self) -> int:
|
||||
"""Get current block height"""
|
||||
try:
|
||||
async with self.session.get(f"{self.base_url}/v1/blocks/head") as response:
|
||||
if response.status == 200:
|
||||
data = await response.json()
|
||||
return data.get("height", 0)
|
||||
except Exception:
|
||||
pass
|
||||
return 0
|
||||
|
||||
|
||||
def generate_test_transaction(i: int) -> Dict[str, Any]:
|
||||
"""Generate a test transaction"""
|
||||
return {
|
||||
"from": f"0xtest_sender_{i % 100:040x}",
|
||||
"to": f"0xtest_receiver_{i % 50:040x}",
|
||||
"value": str((i + 1) * 1000),
|
||||
"nonce": i,
|
||||
"data": f"0x{hash(i) % 1000000:06x}",
|
||||
"gas_limit": 21000,
|
||||
"gas_price": "1000000000" # 1 gwei
|
||||
}
|
||||
|
||||
|
||||
async def worker_task(
|
||||
benchmark: BlockchainBenchmark,
|
||||
worker_id: int,
|
||||
transactions_per_worker: int,
|
||||
results: List[Dict[str, Any]]
|
||||
) -> None:
|
||||
"""Worker task that submits transactions"""
|
||||
logger.info(f"Worker {worker_id} starting")
|
||||
|
||||
for i in range(transactions_per_worker):
|
||||
tx = generate_test_transaction(worker_id * transactions_per_worker + i)
|
||||
result = await benchmark.submit_transaction(tx)
|
||||
results.append(result)
|
||||
|
||||
if not result["success"]:
|
||||
logger.warning(f"Worker {worker_id} transaction failed: {result.get('error', 'unknown')}")
|
||||
|
||||
logger.info(f"Worker {worker_id} completed")
|
||||
|
||||
|
||||
async def run_benchmark(
|
||||
base_url: str,
|
||||
concurrent_clients: int,
|
||||
duration: int,
|
||||
target_tps: int = None
|
||||
) -> BenchmarkResult:
|
||||
"""Run the benchmark"""
|
||||
logger.info(f"Starting benchmark: {concurrent_clients} concurrent clients for {duration}s")
|
||||
|
||||
# Start resource monitoring
|
||||
process = psutil.Process()
|
||||
cpu_samples = []
|
||||
memory_samples = []
|
||||
|
||||
async def monitor_resources():
|
||||
while True:
|
||||
cpu_samples.append(process.cpu_percent())
|
||||
memory_samples.append(process.memory_info().rss / 1024 / 1024) # MB
|
||||
await asyncio.sleep(1)
|
||||
|
||||
# Calculate transactions needed
|
||||
if target_tps:
|
||||
total_transactions = target_tps * duration
|
||||
else:
|
||||
total_transactions = concurrent_clients * 100 # Default: 100 tx per client
|
||||
|
||||
transactions_per_worker = total_transactions // concurrent_clients
|
||||
results = []
|
||||
|
||||
async with BlockchainBenchmark(base_url) as benchmark:
|
||||
# Start resource monitor
|
||||
monitor_task = asyncio.create_task(monitor_resources())
|
||||
|
||||
# Record start block height
|
||||
start_height = await benchmark.get_block_height()
|
||||
|
||||
# Start benchmark
|
||||
start_time = time.time()
|
||||
|
||||
# Create worker tasks
|
||||
tasks = [
|
||||
worker_task(benchmark, i, transactions_per_worker, results)
|
||||
for i in range(concurrent_clients)
|
||||
]
|
||||
|
||||
# Wait for all tasks to complete or timeout
|
||||
try:
|
||||
await asyncio.wait_for(asyncio.gather(*tasks), timeout=duration)
|
||||
except asyncio.TimeoutError:
|
||||
logger.warning("Benchmark timed out")
|
||||
for task in tasks:
|
||||
task.cancel()
|
||||
|
||||
end_time = time.time()
|
||||
actual_duration = end_time - start_time
|
||||
|
||||
# Stop resource monitor
|
||||
monitor_task.cancel()
|
||||
|
||||
# Get final block height
|
||||
end_height = await benchmark.get_block_height()
|
||||
|
||||
# Calculate metrics
|
||||
successful_tx = [r for r in results if r["success"]]
|
||||
latencies = [r["latency"] for r in successful_tx if "latency" in r]
|
||||
|
||||
if latencies:
|
||||
latency_p50 = statistics.median(latencies)
|
||||
latency_p95 = statistics.quantiles(latencies, n=20)[18] # 95th percentile
|
||||
latency_p99 = statistics.quantiles(latencies, n=100)[98] # 99th percentile
|
||||
else:
|
||||
latency_p50 = latency_p95 = latency_p99 = 0
|
||||
|
||||
tps = len(successful_tx) / actual_duration if actual_duration > 0 else 0
|
||||
avg_cpu = statistics.mean(cpu_samples) if cpu_samples else 0
|
||||
avg_memory = statistics.mean(memory_samples) if memory_samples else 0
|
||||
errors = len(results) - len(successful_tx)
|
||||
|
||||
logger.info(f"Benchmark completed:")
|
||||
logger.info(f" Duration: {actual_duration:.2f}s")
|
||||
logger.info(f" Transactions: {len(successful_tx)} successful, {errors} failed")
|
||||
logger.info(f" TPS: {tps:.2f}")
|
||||
logger.info(f" Latency p50/p95/p99: {latency_p50:.2f}/{latency_p95:.2f}/{latency_p99:.2f}ms")
|
||||
logger.info(f" CPU Usage: {avg_cpu:.1f}%")
|
||||
logger.info(f" Memory Usage: {avg_memory:.1f}MB")
|
||||
logger.info(f" Blocks processed: {end_height - start_height}")
|
||||
|
||||
return BenchmarkResult(
|
||||
total_transactions=len(successful_tx),
|
||||
duration=actual_duration,
|
||||
tps=tps,
|
||||
latency_p50=latency_p50,
|
||||
latency_p95=latency_p95,
|
||||
latency_p99=latency_p99,
|
||||
cpu_usage=avg_cpu,
|
||||
memory_usage=avg_memory,
|
||||
errors=errors
|
||||
)
|
||||
|
||||
|
||||
async def main():
|
||||
parser = argparse.ArgumentParser(description="Blockchain Node Throughput Benchmark")
|
||||
parser.add_argument("--target-url", default="http://localhost:8080",
|
||||
help="Blockchain node RPC URL")
|
||||
parser.add_argument("--concurrent-clients", type=int, default=50,
|
||||
help="Number of concurrent client connections")
|
||||
parser.add_argument("--duration", type=int, default=60,
|
||||
help="Benchmark duration in seconds")
|
||||
parser.add_argument("--target-tps", type=int,
|
||||
help="Target TPS to achieve (calculates transaction count)")
|
||||
parser.add_argument("--output", help="Output results to JSON file")
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
# Run benchmark
|
||||
result = await run_benchmark(
|
||||
base_url=args.target_url,
|
||||
concurrent_clients=args.concurrent_clients,
|
||||
duration=args.duration,
|
||||
target_tps=args.target_tps
|
||||
)
|
||||
|
||||
# Output results
|
||||
if args.output:
|
||||
with open(args.output, "w") as f:
|
||||
json.dump({
|
||||
"total_transactions": result.total_transactions,
|
||||
"duration": result.duration,
|
||||
"tps": result.tps,
|
||||
"latency_p50": result.latency_p50,
|
||||
"latency_p95": result.latency_p95,
|
||||
"latency_p99": result.latency_p99,
|
||||
"cpu_usage": result.cpu_usage,
|
||||
"memory_usage": result.memory_usage,
|
||||
"errors": result.errors
|
||||
}, f, indent=2)
|
||||
logger.info(f"Results saved to {args.output}")
|
||||
|
||||
# Provide scaling recommendations
|
||||
logger.info("\n=== Scaling Recommendations ===")
|
||||
if result.tps < 100:
|
||||
logger.info("• Low TPS detected. Consider optimizing transaction processing")
|
||||
if result.latency_p95 > 1000:
|
||||
logger.info("• High latency detected. Consider increasing resources or optimizing database queries")
|
||||
if result.cpu_usage > 80:
|
||||
logger.info("• High CPU usage. Horizontal scaling recommended")
|
||||
if result.memory_usage > 1024:
|
||||
logger.info("• High memory usage. Monitor for memory leaks")
|
||||
|
||||
logger.info(f"\nRecommended minimum resources for current load:")
|
||||
logger.info(f"• CPU: {result.cpu_usage * 1.5:.0f}% (with headroom)")
|
||||
logger.info(f"• Memory: {result.memory_usage * 1.5:.0f}MB (with headroom)")
|
||||
logger.info(f"• Horizontal scaling threshold: ~{result.tps * 0.7:.0f} TPS per node")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
asyncio.run(main())
|
||||
279
apps/blockchain-node/scripts/test_autoscaling.py
Executable file
279
apps/blockchain-node/scripts/test_autoscaling.py
Executable file
@ -0,0 +1,279 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Autoscaling Validation Script
|
||||
|
||||
This script generates synthetic traffic to test and validate HPA behavior.
|
||||
It monitors pod counts and metrics while generating load to ensure autoscaling works as expected.
|
||||
|
||||
Usage:
|
||||
python test_autoscaling.py --service coordinator --namespace default --target-url http://localhost:8011 --duration 300
|
||||
"""
|
||||
|
||||
import asyncio
|
||||
import aiohttp
|
||||
import time
|
||||
import argparse
|
||||
import logging
|
||||
import json
|
||||
from typing import List, Dict, Any
|
||||
from datetime import datetime
|
||||
import subprocess
|
||||
import sys
|
||||
|
||||
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class AutoscalingTest:
|
||||
"""Test suite for validating autoscaling behavior"""
|
||||
|
||||
def __init__(self, service_name: str, namespace: str, target_url: str):
|
||||
self.service_name = service_name
|
||||
self.namespace = namespace
|
||||
self.target_url = target_url
|
||||
self.session = None
|
||||
|
||||
async def __aenter__(self):
|
||||
self.session = aiohttp.ClientSession(timeout=aiohttp.ClientTimeout(total=30))
|
||||
return self
|
||||
|
||||
async def __aexit__(self, exc_type, exc_val, exc_tb):
|
||||
if self.session:
|
||||
await self.session.close()
|
||||
|
||||
async def get_pod_count(self) -> int:
|
||||
"""Get current number of pods for the service"""
|
||||
cmd = [
|
||||
"kubectl", "get", "pods",
|
||||
"-n", self.namespace,
|
||||
"-l", f"app.kubernetes.io/name={self.service_name}",
|
||||
"-o", "jsonpath='{.items[*].status.phase}'"
|
||||
]
|
||||
|
||||
try:
|
||||
result = subprocess.run(cmd, capture_output=True, text=True, check=True)
|
||||
# Count Running pods
|
||||
phases = result.stdout.strip().strip("'").split()
|
||||
return len([p for p in phases if p == "Running"])
|
||||
except subprocess.CalledProcessError as e:
|
||||
logger.error(f"Failed to get pod count: {e}")
|
||||
return 0
|
||||
|
||||
async def get_hpa_status(self) -> Dict[str, Any]:
|
||||
"""Get current HPA status"""
|
||||
cmd = [
|
||||
"kubectl", "get", "hpa",
|
||||
"-n", self.namespace,
|
||||
f"{self.service_name}",
|
||||
"-o", "json"
|
||||
]
|
||||
|
||||
try:
|
||||
result = subprocess.run(cmd, capture_output=True, text=True, check=True)
|
||||
data = json.loads(result.stdout)
|
||||
|
||||
return {
|
||||
"min_replicas": data["spec"]["minReplicas"],
|
||||
"max_replicas": data["spec"]["maxReplicas"],
|
||||
"current_replicas": data["status"]["currentReplicas"],
|
||||
"desired_replicas": data["status"]["desiredReplicas"],
|
||||
"current_cpu": data["status"].get("currentCPUUtilizationPercentage"),
|
||||
"target_cpu": None
|
||||
}
|
||||
|
||||
# Extract target CPU from metrics
|
||||
for metric in data["spec"]["metrics"]:
|
||||
if metric["type"] == "Resource" and metric["resource"]["name"] == "cpu":
|
||||
self.target_cpu = metric["resource"]["target"]["averageUtilization"]
|
||||
break
|
||||
except subprocess.CalledProcessError as e:
|
||||
logger.error(f"Failed to get HPA status: {e}")
|
||||
return {}
|
||||
|
||||
async def generate_load(self, duration: int, concurrent_requests: int = 50):
|
||||
"""Generate sustained load on the service"""
|
||||
logger.info(f"Generating load for {duration}s with {concurrent_requests} concurrent requests")
|
||||
|
||||
async def make_request():
|
||||
try:
|
||||
if self.service_name == "coordinator":
|
||||
# Test marketplace endpoints
|
||||
endpoints = [
|
||||
"/v1/marketplace/offers",
|
||||
"/v1/marketplace/stats"
|
||||
]
|
||||
endpoint = endpoints[hash(time.time()) % len(endpoints)]
|
||||
async with self.session.get(f"{self.target_url}{endpoint}") as response:
|
||||
return response.status == 200
|
||||
elif self.service_name == "blockchain-node":
|
||||
# Test blockchain endpoints
|
||||
payload = {
|
||||
"from": "0xtest_sender",
|
||||
"to": "0xtest_receiver",
|
||||
"value": "1000",
|
||||
"nonce": int(time.time()),
|
||||
"data": "0x",
|
||||
"gas_limit": 21000,
|
||||
"gas_price": "1000000000"
|
||||
}
|
||||
async with self.session.post(f"{self.target_url}/v1/transactions", json=payload) as response:
|
||||
return response.status == 200
|
||||
else:
|
||||
# Generic health check
|
||||
async with self.session.get(f"{self.target_url}/v1/health") as response:
|
||||
return response.status == 200
|
||||
except Exception as e:
|
||||
logger.debug(f"Request failed: {e}")
|
||||
return False
|
||||
|
||||
# Generate sustained load
|
||||
start_time = time.time()
|
||||
tasks = []
|
||||
|
||||
while time.time() - start_time < duration:
|
||||
# Create batch of concurrent requests
|
||||
batch = [make_request() for _ in range(concurrent_requests)]
|
||||
tasks.extend(batch)
|
||||
|
||||
# Wait for batch to complete
|
||||
await asyncio.gather(*batch, return_exceptions=True)
|
||||
|
||||
# Brief pause between batches
|
||||
await asyncio.sleep(0.1)
|
||||
|
||||
logger.info(f"Load generation completed")
|
||||
|
||||
async def monitor_scaling(self, duration: int, interval: int = 10):
|
||||
"""Monitor pod scaling during load test"""
|
||||
logger.info(f"Monitoring scaling for {duration}s")
|
||||
|
||||
results = []
|
||||
start_time = time.time()
|
||||
|
||||
while time.time() - start_time < duration:
|
||||
timestamp = datetime.now().isoformat()
|
||||
pod_count = await self.get_pod_count()
|
||||
hpa_status = await self.get_hpa_status()
|
||||
|
||||
result = {
|
||||
"timestamp": timestamp,
|
||||
"pod_count": pod_count,
|
||||
"hpa_status": hpa_status
|
||||
}
|
||||
|
||||
results.append(result)
|
||||
logger.info(f"[{timestamp}] Pods: {pod_count}, HPA: {hpa_status}")
|
||||
|
||||
await asyncio.sleep(interval)
|
||||
|
||||
return results
|
||||
|
||||
async def run_test(self, load_duration: int = 300, monitor_duration: int = 400):
|
||||
"""Run complete autoscaling test"""
|
||||
logger.info(f"Starting autoscaling test for {self.service_name}")
|
||||
|
||||
# Record initial state
|
||||
initial_pods = await self.get_pod_count()
|
||||
initial_hpa = await self.get_hpa_status()
|
||||
|
||||
logger.info(f"Initial state - Pods: {initial_pods}, HPA: {initial_hpa}")
|
||||
|
||||
# Start monitoring in background
|
||||
monitor_task = asyncio.create_task(
|
||||
self.monitor_scaling(monitor_duration)
|
||||
)
|
||||
|
||||
# Wait a bit to establish baseline
|
||||
await asyncio.sleep(30)
|
||||
|
||||
# Generate load
|
||||
await self.generate_load(load_duration)
|
||||
|
||||
# Wait for scaling to stabilize
|
||||
await asyncio.sleep(60)
|
||||
|
||||
# Get monitoring results
|
||||
monitoring_results = await monitor_task
|
||||
|
||||
# Analyze results
|
||||
max_pods = max(r["pod_count"] for r in monitoring_results)
|
||||
min_pods = min(r["pod_count"] for r in monitoring_results)
|
||||
scaled_up = max_pods > initial_pods
|
||||
|
||||
logger.info("\n=== Test Results ===")
|
||||
logger.info(f"Initial pods: {initial_pods}")
|
||||
logger.info(f"Min pods during test: {min_pods}")
|
||||
logger.info(f"Max pods during test: {max_pods}")
|
||||
logger.info(f"Scaling occurred: {scaled_up}")
|
||||
|
||||
if scaled_up:
|
||||
logger.info("✅ Autoscaling test PASSED - Service scaled up under load")
|
||||
else:
|
||||
logger.warning("⚠️ Autoscaling test FAILED - Service did not scale up")
|
||||
logger.warning("Check:")
|
||||
logger.warning(" - HPA configuration")
|
||||
logger.warning(" - Metrics server is running")
|
||||
logger.warning(" - Resource requests/limits are set")
|
||||
logger.warning(" - Load was sufficient to trigger scaling")
|
||||
|
||||
# Save results
|
||||
results_file = f"autoscaling_test_{self.service_name}_{datetime.now().strftime('%Y%m%d_%H%M%S')}.json"
|
||||
with open(results_file, "w") as f:
|
||||
json.dump({
|
||||
"service": self.service_name,
|
||||
"namespace": self.namespace,
|
||||
"initial_pods": initial_pods,
|
||||
"max_pods": max_pods,
|
||||
"min_pods": min_pods,
|
||||
"scaled_up": scaled_up,
|
||||
"monitoring_data": monitoring_results
|
||||
}, f, indent=2)
|
||||
|
||||
logger.info(f"Detailed results saved to: {results_file}")
|
||||
|
||||
return scaled_up
|
||||
|
||||
|
||||
async def main():
|
||||
parser = argparse.ArgumentParser(description="Autoscaling Validation Test")
|
||||
parser.add_argument("--service", required=True,
|
||||
choices=["coordinator", "blockchain-node", "wallet-daemon"],
|
||||
help="Service to test")
|
||||
parser.add_argument("--namespace", default="default",
|
||||
help="Kubernetes namespace")
|
||||
parser.add_argument("--target-url", required=True,
|
||||
help="Service URL to generate load against")
|
||||
parser.add_argument("--load-duration", type=int, default=300,
|
||||
help="Duration of load generation in seconds")
|
||||
parser.add_argument("--monitor-duration", type=int, default=400,
|
||||
help="Total monitoring duration in seconds")
|
||||
parser.add_argument("--local-mode", action="store_true",
|
||||
help="Run in local mode without Kubernetes (load test only)")
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
if not args.local_mode:
|
||||
# Verify kubectl is available
|
||||
try:
|
||||
subprocess.run(["kubectl", "version"], capture_output=True, check=True)
|
||||
except (subprocess.CalledProcessError, FileNotFoundError):
|
||||
logger.error("kubectl is not available or not configured")
|
||||
logger.info("Use --local-mode to run load test without Kubernetes monitoring")
|
||||
sys.exit(1)
|
||||
|
||||
# Run test
|
||||
async with AutoscalingTest(args.service, args.namespace, args.target_url) as test:
|
||||
if args.local_mode:
|
||||
# Local mode: just test load generation
|
||||
logger.info(f"Running load test for {args.service} in local mode")
|
||||
await test.generate_load(args.load_duration)
|
||||
logger.info("Load test completed successfully")
|
||||
success = True
|
||||
else:
|
||||
# Full autoscaling test
|
||||
success = await test.run_test(args.load_duration, args.monitor_duration)
|
||||
sys.exit(0 if success else 1)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
asyncio.run(main())
|
||||
Reference in New Issue
Block a user