- Delete AIPowerRental.sol (566 lines) - replaced by AgentBounty.sol - Remove rental agreement system with provider/consumer model - Remove performance metrics and SLA tracking - Remove dispute resolution mechanism - Remove ZK-proof verification for performance - Remove provider/consumer authorization system - Bounty system provides superior developer incentive structure
393 lines
11 KiB
YAML
393 lines
11 KiB
YAML
# AITBC Advanced Agent Features Production Monitoring
|
|
# Docker Compose configuration for production monitoring services
|
|
version: '3.8'
|
|
|
|
services:
|
|
# Cross-Chain Reputation Monitoring
|
|
reputation-monitor:
|
|
image: prom/prometheus:v2.45.0
|
|
container_name: reputation-monitor
|
|
ports:
|
|
- "9090:9090"
|
|
volumes:
|
|
- ./prometheus.yml:/etc/prometheus/prometheus.yml
|
|
- ./rules:/etc/prometheus/rules
|
|
- prometheus-data:/prometheus
|
|
command:
|
|
- '--config.file=/etc/prometheus/prometheus.yml'
|
|
- '--storage.tsdb.path=/prometheus'
|
|
- '--storage.tsdb.retention.time=200h'
|
|
- '--storage.tsdb.retention.size=50GB'
|
|
- '--web.console.libraries=/etc/prometheus/console_libraries'
|
|
- '--web.console.templates=/etc/prometheus/consoles'
|
|
- '--web.enable-lifecycle'
|
|
- '--web.enable-admin-api'
|
|
restart: unless-stopped
|
|
networks:
|
|
- monitoring
|
|
healthcheck:
|
|
test: ["CMD", "wget", "--no-verbose", "--tries=1", "--spider", "http://localhost:9090/-/healthy"]
|
|
interval: 30s
|
|
timeout: 10s
|
|
retries: 3
|
|
labels:
|
|
- "service=monitoring"
|
|
- "component=prometheus"
|
|
|
|
# Agent Communication Monitoring
|
|
communication-monitor:
|
|
image: grafana/grafana:10.0.0
|
|
container_name: communication-monitor
|
|
ports:
|
|
- "3001:3000"
|
|
environment:
|
|
- GF_SECURITY_ADMIN_USER=admin
|
|
- GF_SECURITY_ADMIN_PASSWORD=${GRAFANA_ADMIN_PASSWORD}
|
|
- GF_USERS_ALLOW_SIGN_UP=false
|
|
- GF_USERS_ALLOW_ORG_CREATE=false
|
|
- GF_INSTALL_PLUGINS=grafana-piechart-panel,grafana-worldmap-panel,grafana-clock-panel,grafana-simple-json-datasource
|
|
- GF_SERVER_ROOT_URL=http://localhost:3001
|
|
- GF_SERVER_DOMAIN=localhost
|
|
- GF_SERVER_PROTOCOL=http
|
|
- GF_SERVER_HTTP_PORT=3001
|
|
- GF_SERVER_ENFORCE_DOMAIN=false
|
|
- GF_SMTP_ENABLED=false
|
|
- GF_LOG_LEVEL=info
|
|
- GF_LOG_MODE=file
|
|
- GF_PATHS_LOGS=/var/log/grafana
|
|
- GF_PATHS_PLUGINS=/var/lib/grafana/plugins
|
|
- GF_PATHS_PROVISIONING=/etc/grafana/provisioning
|
|
- GF_PATHS_DATA=/var/lib/grafana
|
|
volumes:
|
|
- ./grafana/provisioning:/etc/grafana/provisioning
|
|
- ./grafana/dashboards:/var/lib/grafana/dashboards
|
|
- grafana-data:/var/lib/grafana
|
|
- grafana-logs:/var/log/grafana
|
|
restart: unless-stopped
|
|
networks:
|
|
- monitoring
|
|
healthcheck:
|
|
test: ["CMD-SHELL", "curl -f http://localhost:3001/api/health || exit 1"]
|
|
interval: 30s
|
|
timeout: 10s
|
|
retries: 3
|
|
labels:
|
|
- "service=monitoring"
|
|
- "component=grafana"
|
|
|
|
# Advanced Learning Monitoring
|
|
learning-monitor:
|
|
image: node:18-alpine
|
|
container_name: learning-monitor
|
|
working_dir: /app
|
|
volumes:
|
|
- ./learning-monitor:/app
|
|
- learning-monitor-logs:/app/logs
|
|
command: npm start
|
|
restart: unless-stopped
|
|
networks:
|
|
- monitoring
|
|
healthcheck:
|
|
test: ["CMD", "wget", "--no-verbose", "--tries=1", "--spider", "http://localhost:8002/health || exit 1"]
|
|
interval: 30s
|
|
timeout: 10s
|
|
retries: 3
|
|
labels:
|
|
- "service=monitoring"
|
|
- "component=learning-monitor"
|
|
|
|
# Agent Collaboration Monitoring
|
|
collaboration-monitor:
|
|
image: node:18-alpine
|
|
container_name: collaboration-monitor
|
|
working_dir: /app
|
|
volumes:
|
|
- ./collaboration-monitor:/app
|
|
- collaboration-monitor-logs:/app/logs
|
|
command: npm start
|
|
restart: unless-stopped
|
|
networks:
|
|
- monitoring
|
|
healthcheck:
|
|
test: ["CMD", "wget", "--no-verbose", "--tries=1", "--spider", "http://localhost:8003/health || exit 1"]
|
|
interval: 30s
|
|
timeout: 10s
|
|
retries: 3
|
|
labels:
|
|
- "service=monitoring"
|
|
- "component=collaboration-monitor"
|
|
|
|
# Log Aggregation
|
|
log-aggregator:
|
|
image: fluent/fluent-bit:v2.2.0
|
|
container_name: log-aggregator
|
|
volumes:
|
|
- ./fluent-bit.conf:/fluent-bit/etc/fluent-bit.conf
|
|
- /var/log:/var/log:ro
|
|
- /var/log/containers:/var/log/containers:ro
|
|
- /var/log/pods:/var/log/pods:ro
|
|
- fluent-bit-data:/var/log/fluent-bit
|
|
restart: unless-stopped
|
|
networks:
|
|
- monitoring
|
|
healthcheck:
|
|
test: ["CMD", "fluent-bit --version"]
|
|
interval: 30s
|
|
timeout: 10s
|
|
retries: 3
|
|
labels:
|
|
- "service=monitoring"
|
|
- "component=fluent-bit"
|
|
|
|
# Alert Manager
|
|
alert-manager:
|
|
image: prom/alertmanager:v0.25.0
|
|
container_name: alert-manager
|
|
ports:
|
|
- "9093:9093"
|
|
volumes:
|
|
- ./alertmanager.yml:/etc/alertmanager/alertmanager.yml
|
|
- alertmanager-data:/alertmanager
|
|
command:
|
|
- '--config.file=/etc/alertmanager/alertmanager.yml'
|
|
- '--storage.path=/alertmanager'
|
|
- '--web.external-url=http://localhost:9093'
|
|
- '--web.route-prefix=/'
|
|
restart: unless-stopped
|
|
networks:
|
|
- monitoring
|
|
healthcheck:
|
|
test: ["CMD", "wget", "--no-verbose", "--tries=1", "--spider", "http://localhost:9093/-/healthy || exit 1"]
|
|
interval: 30s
|
|
timeout: 10s
|
|
retries: 3
|
|
labels:
|
|
- "service=monitoring"
|
|
- "component=alertmanager"
|
|
|
|
# Node Exporter (System Metrics)
|
|
node-exporter:
|
|
image: prom/node-exporter:v1.6.0
|
|
container_name: node-exporter
|
|
ports:
|
|
- "9100:9100"
|
|
volumes:
|
|
- /proc:/host/proc:ro
|
|
- /sys:/host/sys:ro
|
|
- /:/rootfs:ro
|
|
command:
|
|
- '--path.procfs=/host/proc'
|
|
- '--path.rootfs=/rootfs'
|
|
- '--path.sysfs=/host/sys'
|
|
- '--collector.filesystem.mount-points-exclude=^/(sys|proc|dev|host|etc)($$|/)'
|
|
restart: unless-stopped
|
|
networks:
|
|
- monitoring
|
|
healthcheck:
|
|
test: ["CMD", "wget", "--no-verbose", "--tries=1", "--spider", "http://localhost:9100/metrics || exit 1"]
|
|
interval: 30s
|
|
timeout: 10s
|
|
retries: 3
|
|
labels:
|
|
- "service=monitoring"
|
|
- "component=node-exporter"
|
|
|
|
# Process Exporter (Application Metrics)
|
|
process-exporter:
|
|
image: ncabatoff/process-exporter:v0.7.0
|
|
container_name: process-exporter
|
|
ports:
|
|
- "9256:9256"
|
|
volumes:
|
|
- /proc:/host/proc:ro
|
|
command:
|
|
- '--procfs=/host/proc'
|
|
- '--path.procfs=/host/proc'
|
|
restart: unless-stopped
|
|
networks:
|
|
- monitoring
|
|
healthcheck:
|
|
test: ["CMD", "wget", "--no-verbose", "--tries=1", "--spider", "http://localhost:9256/metrics || exit 1"]
|
|
interval: 30s
|
|
timeout: 10s
|
|
retries: 3
|
|
labels:
|
|
- "service=monitoring"
|
|
- "component=process-exporter"
|
|
|
|
# Blackbox Exporter (External Monitoring)
|
|
blackbox-exporter:
|
|
image: prom/blackbox-exporter:v0.22.0
|
|
container_name: blackbox-exporter
|
|
ports:
|
|
- "9115:9115"
|
|
volumes:
|
|
- ./blackbox.yml:/etc/blackbox/blackbox.yml
|
|
restart: unless-stopped
|
|
networks:
|
|
- monitoring
|
|
healthcheck:
|
|
test: ["CMD", "wget", "--no-verbose", "--tries=1", "--spider", "http://localhost:9115/metrics || exit 1"]
|
|
interval: 30s
|
|
timeout: 10s
|
|
retries: 3
|
|
labels:
|
|
- "service=monitoring"
|
|
- "component=blackbox-exporter"
|
|
|
|
# Loki (Log Aggregation)
|
|
loki:
|
|
image: grafana/loki:2.9.0
|
|
container_name: loki
|
|
ports:
|
|
- "3100:3100"
|
|
volumes:
|
|
- ./loki.yml:/etc/loki/local-config.yaml
|
|
- loki-data:/loki
|
|
command:
|
|
- '-config.file=/etc/loki/local-config.yaml'
|
|
- '-config.expand-env=true'
|
|
restart: unless-stopped
|
|
networks:
|
|
- monitoring
|
|
healthcheck:
|
|
test: ["CMD", "wget", "--no-verbose", "--tries=1", "--spider", "http://localhost:3100/ready || exit 1"]
|
|
interval: 30s
|
|
timeout: 10s
|
|
retries: 3
|
|
labels:
|
|
- "service=monitoring"
|
|
- "component=loki"
|
|
|
|
# Promtail (Log Collection)
|
|
promtail:
|
|
image: grafana/promtail:2.9.0
|
|
container_name: promtail
|
|
volumes:
|
|
- ./promtail.yml:/etc/promtail/config.yml
|
|
- /var/log:/var/log:ro
|
|
- /var/log/containers:/var/log/containers:ro
|
|
- /var/log/pods:/var/log/pods:ro
|
|
command:
|
|
- '-config.file=/etc/promtail/config.yml'
|
|
- '-config.expand-env=true'
|
|
restart: unless-stopped
|
|
networks:
|
|
- monitoring
|
|
healthcheck:
|
|
test: ["CMD", "wget", "--no-verbose", "--tries=1", "--spider", "http://localhost:9081/ready || exit 1"]
|
|
interval: 30s
|
|
timeout: 10s
|
|
retries: 3
|
|
labels:
|
|
- "service=monitoring"
|
|
- "component=promtail"
|
|
|
|
# Jaeger (Tracing)
|
|
jaeger:
|
|
image: jaegertracing/all-in-one:1.47.0
|
|
container_name: jaeger
|
|
ports:
|
|
- "16686:16686"
|
|
- "14250:14250"
|
|
- "14268:14268"
|
|
- "14269:14269"
|
|
- "5778:5778"
|
|
- "5775:5775"
|
|
environment:
|
|
- COLLECTOR_ZIPKIN_HOST_PORT=9411
|
|
- COLLECTOR_OTLP_HOST_PORT=14250
|
|
- COLLECTOR_JAEGER_AGENT_HOST_PORT=14268
|
|
- QUERY_BASE_PATH=/
|
|
- SPAN_STORAGE_TYPE=badger
|
|
- BADGER_EPHEMERAL_SPREAD=2000000
|
|
restart: unless-stopped
|
|
networks:
|
|
- monitoring
|
|
healthcheck:
|
|
test: ["CMD", "wget", "--no-verbose", "--tries=1", "--spider", "http://localhost:16686/ || exit 1"]
|
|
interval: 30s
|
|
timeout: 10s
|
|
retries: 3
|
|
labels:
|
|
- "service=monitoring"
|
|
- "component=jaeger"
|
|
|
|
# Redis (Cache)
|
|
redis:
|
|
image: redis:7.2.3-alpine
|
|
container_name: redis
|
|
ports:
|
|
- "6379:6379"
|
|
volumes:
|
|
- redis-data:/data
|
|
command: redis-server --appendonly yes --maxmemory 512mb --maxmemory-policy allkeys-lru
|
|
restart: unless-stopped
|
|
networks:
|
|
- monitoring
|
|
healthcheck:
|
|
test: ["CMD", "redis-cli", "ping"]
|
|
interval: 30s
|
|
timeout: 10s
|
|
retries: 3
|
|
labels:
|
|
- "service=monitoring"
|
|
- "component=redis"
|
|
|
|
# Grafana Loki Data Source
|
|
grafana-loki-datasource:
|
|
image: grafana/grafana-image-renderer:3.8.2
|
|
container_name: grafana-loki-datasource
|
|
ports:
|
|
- "8081:8081"
|
|
environment:
|
|
- RENDERING_SERVER_URL=http://localhost:8081/render
|
|
- RENDERING_CALLBACK_URL=http://localhost:3001/
|
|
- RENDERING_HOST=localhost
|
|
restart: unless-stopped
|
|
networks:
|
|
- monitoring
|
|
healthcheck:
|
|
test: ["CMD", "wget", "--no-verbose", "--tries=1", "--spider", "http://localhost:8081/health || exit 1"]
|
|
interval: 30s
|
|
timeout: 10s
|
|
retries: 3
|
|
labels:
|
|
- "service=monitoring"
|
|
- "component=grafana-image-renderer"
|
|
|
|
networks:
|
|
monitoring:
|
|
driver: bridge
|
|
ipam:
|
|
config:
|
|
- subnet: 172.20.0.0/16
|
|
- gateway: 172.20.0.1
|
|
|
|
volumes:
|
|
prometheus-data:
|
|
driver: local
|
|
grafana-data:
|
|
driver: local
|
|
alertmanager-data:
|
|
driver: local
|
|
fluent-bit-data:
|
|
driver: local
|
|
loki-data:
|
|
driver: local
|
|
redis-data:
|
|
driver: local
|
|
grafana-logs:
|
|
driver: local
|
|
learning-monitor-logs:
|
|
driver: local
|
|
collaboration-monitor-logs:
|
|
driver: local
|
|
|
|
# Logging configuration
|
|
logging:
|
|
driver: "json-file"
|
|
options:
|
|
max-size: "10m"
|
|
max-file: "3"
|