feat: add marketplace metrics, privacy features, and service registry endpoints
- Add Prometheus metrics for marketplace API throughput and error rates with new dashboard panels - Implement confidential transaction models with encryption support and access control - Add key management system with registration, rotation, and audit logging - Create services and registry routers for service discovery and management - Integrate ZK proof generation for privacy-preserving receipts - Add metrics instru
This commit is contained in:
124
infra/helm/charts/monitoring/values.yaml
Normal file
124
infra/helm/charts/monitoring/values.yaml
Normal file
@ -0,0 +1,124 @@
|
||||
# Default values for aitbc-monitoring.
|
||||
|
||||
# Prometheus configuration
|
||||
prometheus:
|
||||
enabled: true
|
||||
server:
|
||||
enabled: true
|
||||
global:
|
||||
scrape_interval: 15s
|
||||
evaluation_interval: 15s
|
||||
retention: 30d
|
||||
persistentVolume:
|
||||
enabled: true
|
||||
size: 100Gi
|
||||
resources:
|
||||
limits:
|
||||
cpu: 2000m
|
||||
memory: 4Gi
|
||||
requests:
|
||||
cpu: 1000m
|
||||
memory: 2Gi
|
||||
service:
|
||||
type: ClusterIP
|
||||
port: 9090
|
||||
serviceMonitors:
|
||||
enabled: true
|
||||
selector:
|
||||
release: monitoring
|
||||
alertmanager:
|
||||
enabled: false
|
||||
config:
|
||||
global:
|
||||
resolve_timeout: 5m
|
||||
route:
|
||||
group_by: ['alertname']
|
||||
group_wait: 10s
|
||||
group_interval: 10s
|
||||
repeat_interval: 1h
|
||||
receiver: 'web.hook'
|
||||
receivers:
|
||||
- name: 'web.hook'
|
||||
webhook_configs:
|
||||
- url: 'http://127.0.0.1:5001/'
|
||||
|
||||
# Grafana configuration
|
||||
grafana:
|
||||
enabled: true
|
||||
adminPassword: admin
|
||||
persistence:
|
||||
enabled: true
|
||||
size: 20Gi
|
||||
resources:
|
||||
limits:
|
||||
cpu: 1000m
|
||||
memory: 2Gi
|
||||
requests:
|
||||
cpu: 500m
|
||||
memory: 1Gi
|
||||
service:
|
||||
type: ClusterIP
|
||||
port: 3000
|
||||
datasources:
|
||||
datasources.yaml:
|
||||
apiVersion: 1
|
||||
datasources:
|
||||
- name: Prometheus
|
||||
type: prometheus
|
||||
url: http://prometheus-server:9090
|
||||
access: proxy
|
||||
isDefault: true
|
||||
dashboardProviders:
|
||||
dashboardproviders.yaml:
|
||||
apiVersion: 1
|
||||
providers:
|
||||
- name: 'default'
|
||||
orgId: 1
|
||||
folder: ''
|
||||
type: file
|
||||
disableDeletion: false
|
||||
editable: true
|
||||
options:
|
||||
path: /var/lib/grafana/dashboards/default
|
||||
|
||||
# Service monitors for AITBC services
|
||||
serviceMonitors:
|
||||
coordinator:
|
||||
enabled: true
|
||||
interval: 30s
|
||||
path: /metrics
|
||||
port: http
|
||||
blockchainNode:
|
||||
enabled: true
|
||||
interval: 30s
|
||||
path: /metrics
|
||||
port: http
|
||||
walletDaemon:
|
||||
enabled: true
|
||||
interval: 30s
|
||||
path: /metrics
|
||||
port: http
|
||||
|
||||
# Alert rules
|
||||
alertRules:
|
||||
enabled: true
|
||||
groups:
|
||||
- name: aitbc.rules
|
||||
rules:
|
||||
- alert: HighErrorRate
|
||||
expr: rate(marketplace_errors_total[5m]) / rate(marketplace_requests_total[5m]) > 0.1
|
||||
for: 5m
|
||||
labels:
|
||||
severity: warning
|
||||
annotations:
|
||||
summary: "High error rate detected"
|
||||
description: "Error rate is above 10% for 5 minutes"
|
||||
|
||||
- alert: CoordinatorDown
|
||||
expr: up{job="coordinator"} == 0
|
||||
for: 1m
|
||||
labels:
|
||||
severity: critical
|
||||
annotations:
|
||||
summary: "Coordinator is down"
|
||||
description: "Coordinator API has been down for more than 1 minute"
|
||||
Reference in New Issue
Block a user