feat: add foreign key constraints and metrics for blockchain node
This commit is contained in:
43
apps/blockchain-node/observability/README.md
Normal file
43
apps/blockchain-node/observability/README.md
Normal file
@ -0,0 +1,43 @@
|
||||
# Blockchain Node Observability
|
||||
|
||||
This directory contains Prometheus and Grafana assets for the devnet environment. The stack relies on the HTTP `/metrics` endpoint exposed by:
|
||||
|
||||
1. The blockchain node API (`http://127.0.0.1:8080/metrics`).
|
||||
2. The mock coordinator/miner exporter (`http://127.0.0.1:8090/metrics`).
|
||||
|
||||
## Files
|
||||
|
||||
- `prometheus.yml` – Scrapes both blockchain node and mock coordinator/miner metrics.
|
||||
- `grafana-dashboard.json` – Panels for block interval, RPC throughput, miner activity, coordinator receipt flow, **plus new gossip queue, subscriber, and publication rate panels**.
|
||||
- `alerts.yml` – Alertmanager rules highlighting proposer stalls, miner errors, and coordinator receipt drop-offs.
|
||||
- `gossip-recording-rules.yml` – Prometheus recording rules that derive queue/subscriber gauges and publication rates from gossip metrics.
|
||||
|
||||
## Usage
|
||||
|
||||
```bash
|
||||
# Launch Prometheus using the sample config
|
||||
prometheus --config.file=apps/blockchain-node/observability/prometheus.yml
|
||||
|
||||
# Import the dashboard JSON into Grafana
|
||||
grafana-cli dashboards import apps/blockchain-node/observability/grafana-dashboard.json
|
||||
|
||||
# Run Alertmanager with the example rules
|
||||
alertmanager --config.file=apps/blockchain-node/observability/alerts.yml
|
||||
|
||||
# Reload Prometheus and Alertmanager after tuning thresholds
|
||||
kill -HUP $(pgrep prometheus)
|
||||
kill -HUP $(pgrep alertmanager)
|
||||
```
|
||||
|
||||
> **Tip:** The devnet helper `scripts/devnet_up.sh` seeds the metrics endpoints. After running it, both scrape targets will begin emitting data in under a minute.
|
||||
|
||||
## Gossip Observability
|
||||
|
||||
Recent updates instrumented the gossip broker with Prometheus counters and gauges. Key metrics surfaced via the recording rules and dashboard include:
|
||||
|
||||
- `gossip_publications_rate_per_sec` and `gossip_broadcast_publications_rate_per_sec` – per-second publication throughput for in-memory and broadcast backends.
|
||||
- `gossip_publications_topic_rate_per_sec` – topic-level publication rate time series (Grafana panel “Gossip Publication Rate by Topic”).
|
||||
- `gossip_queue_size_by_topic` – instantaneous queue depth per topic (“Gossip Queue Depth by Topic”).
|
||||
- `gossip_subscribers_by_topic`, `gossip_subscribers_total`, `gossip_broadcast_subscribers_total` – subscriber counts (“Gossip Subscriber Counts”).
|
||||
|
||||
Use these panels to monitor convergence/back-pressure during load tests (for example with `scripts/ws_load_test.py`) when running against a Redis-backed broadcast backend.
|
||||
43
apps/blockchain-node/observability/alerts.yml
Normal file
43
apps/blockchain-node/observability/alerts.yml
Normal file
@ -0,0 +1,43 @@
|
||||
groups:
|
||||
- name: blockchain-node
|
||||
rules:
|
||||
- alert: BlockProposalStalled
|
||||
expr: (block_interval_seconds_sum / block_interval_seconds_count) > 5
|
||||
for: 1m
|
||||
labels:
|
||||
severity: warning
|
||||
annotations:
|
||||
summary: "Block production interval exceeded 5s"
|
||||
description: |
|
||||
Average block interval is {{ $value }} seconds, exceeding the expected cadence.
|
||||
|
||||
- alert: BlockProposalDown
|
||||
expr: (block_interval_seconds_sum / block_interval_seconds_count) > 10
|
||||
for: 2m
|
||||
labels:
|
||||
severity: critical
|
||||
annotations:
|
||||
summary: "Block production halted"
|
||||
description: |
|
||||
Block intervals have spiked above 10 seconds for more than two minutes.
|
||||
Check proposer loop and database state.
|
||||
|
||||
- alert: MinerErrorsDetected
|
||||
expr: miner_error_rate > 0
|
||||
for: 1m
|
||||
labels:
|
||||
severity: critical
|
||||
annotations:
|
||||
summary: "Miner mock reporting errors"
|
||||
description: |
|
||||
The miner mock error gauge is {{ $value }}. Investigate miner telemetry.
|
||||
|
||||
- alert: CoordinatorReceiptDrop
|
||||
expr: rate(miner_receipts_attested_total[5m]) == 0
|
||||
for: 5m
|
||||
labels:
|
||||
severity: warning
|
||||
annotations:
|
||||
summary: "No receipts attested in 5 minutes"
|
||||
description: |
|
||||
Receipt attestations ceased during the last five minutes. Inspect coordinator connectivity.
|
||||
@ -0,0 +1,36 @@
|
||||
groups:
|
||||
- name: gossip_metrics
|
||||
interval: 15s
|
||||
rules:
|
||||
- record: gossip_publications_rate_per_sec
|
||||
expr: rate(gossip_publications_total[1m])
|
||||
|
||||
- record: gossip_broadcast_publications_rate_per_sec
|
||||
expr: rate(gossip_broadcast_publications_total[1m])
|
||||
|
||||
- record: gossip_publications_topic_rate_per_sec
|
||||
expr: label_replace(
|
||||
rate({__name__=~"gossip_publications_topic_.*"}[1m]),
|
||||
"topic",
|
||||
"$1",
|
||||
"__name__",
|
||||
"gossip_publications_topic_(.*)"
|
||||
)
|
||||
|
||||
- record: gossip_queue_size_by_topic
|
||||
expr: label_replace(
|
||||
{__name__=~"gossip_queue_size_.*"},
|
||||
"topic",
|
||||
"$1",
|
||||
"__name__",
|
||||
"gossip_queue_size_(.*)"
|
||||
)
|
||||
|
||||
- record: gossip_subscribers_by_topic
|
||||
expr: label_replace(
|
||||
{__name__=~"gossip_subscribers_topic_.*"},
|
||||
"topic",
|
||||
"$1",
|
||||
"__name__",
|
||||
"gossip_subscribers_topic_(.*)"
|
||||
)
|
||||
377
apps/blockchain-node/observability/grafana-dashboard.json
Normal file
377
apps/blockchain-node/observability/grafana-dashboard.json
Normal file
@ -0,0 +1,377 @@
|
||||
{
|
||||
"annotations": {
|
||||
"list": [
|
||||
{
|
||||
"builtIn": 1,
|
||||
"datasource": {
|
||||
"type": "grafana",
|
||||
"uid": "grafana"
|
||||
},
|
||||
"enable": true,
|
||||
"hide": true,
|
||||
"iconColor": "rgba(0, 211, 255, 1)",
|
||||
"name": "Annotations & Alerts",
|
||||
"type": "dashboard"
|
||||
}
|
||||
]
|
||||
},
|
||||
"description": "AITBC devnet observability for blockchain node, coordinator, and miner mock.",
|
||||
"editable": true,
|
||||
"fiscalYearStartMonth": 0,
|
||||
"gnetId": null,
|
||||
"graphTooltip": 0,
|
||||
"id": null,
|
||||
"iteration": 1727420700000,
|
||||
"links": [],
|
||||
"liveNow": false,
|
||||
"panels": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "PROMETHEUS_DS"
|
||||
},
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"custom": {},
|
||||
"unit": "s"
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 8,
|
||||
"w": 12,
|
||||
"x": 0,
|
||||
"y": 0
|
||||
},
|
||||
"id": 1,
|
||||
"options": {
|
||||
"legend": {
|
||||
"calcs": [],
|
||||
"displayMode": "list",
|
||||
"placement": "bottom",
|
||||
"showLegend": true
|
||||
}
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
"expr": "block_interval_seconds_sum / block_interval_seconds_count",
|
||||
"legendFormat": "avg block interval",
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
"title": "Block Interval (seconds)",
|
||||
"type": "timeseries"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "PROMETHEUS_DS"
|
||||
},
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"custom": {},
|
||||
"unit": "ops"
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 8,
|
||||
"w": 12,
|
||||
"x": 12,
|
||||
"y": 0
|
||||
},
|
||||
"id": 2,
|
||||
"options": {
|
||||
"legend": {
|
||||
"calcs": ["lastNotNull"],
|
||||
"displayMode": "table",
|
||||
"placement": "bottom",
|
||||
"showLegend": true
|
||||
}
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
"expr": "rate(rpc_send_tx_total[5m])",
|
||||
"legendFormat": "sendTx",
|
||||
"refId": "A"
|
||||
},
|
||||
{
|
||||
"expr": "rate(rpc_submit_receipt_total[5m])",
|
||||
"legendFormat": "submitReceipt",
|
||||
"refId": "B"
|
||||
},
|
||||
{
|
||||
"expr": "rate(rpc_get_head_total[5m])",
|
||||
"legendFormat": "getHead",
|
||||
"refId": "C"
|
||||
}
|
||||
],
|
||||
"title": "RPC Throughput",
|
||||
"type": "timeseries"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "PROMETHEUS_DS"
|
||||
},
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"custom": {}
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 8,
|
||||
"w": 12,
|
||||
"x": 0,
|
||||
"y": 8
|
||||
},
|
||||
"id": 3,
|
||||
"options": {
|
||||
"legend": {
|
||||
"calcs": ["lastNotNull"],
|
||||
"displayMode": "table",
|
||||
"placement": "bottom",
|
||||
"showLegend": true
|
||||
}
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
"expr": "miner_active_jobs",
|
||||
"legendFormat": "active jobs",
|
||||
"refId": "A"
|
||||
},
|
||||
{
|
||||
"expr": "miner_error_rate",
|
||||
"legendFormat": "error gauge",
|
||||
"refId": "B"
|
||||
}
|
||||
],
|
||||
"title": "Miner Activity",
|
||||
"type": "timeseries"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "PROMETHEUS_DS"
|
||||
},
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"custom": {},
|
||||
"unit": "short"
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 8,
|
||||
"w": 12,
|
||||
"x": 12,
|
||||
"y": 8
|
||||
},
|
||||
"id": 4,
|
||||
"options": {
|
||||
"legend": {
|
||||
"calcs": [],
|
||||
"displayMode": "list",
|
||||
"placement": "bottom",
|
||||
"showLegend": true
|
||||
}
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
"expr": "rate(miner_receipts_attested_total[5m])",
|
||||
"legendFormat": "receipts attested",
|
||||
"refId": "A"
|
||||
},
|
||||
{
|
||||
"expr": "rate(miner_receipts_unknown_total[5m])",
|
||||
"legendFormat": "unknown receipts",
|
||||
"refId": "B"
|
||||
}
|
||||
],
|
||||
"title": "Coordinator Receipt Flow",
|
||||
"type": "timeseries"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "PROMETHEUS_DS"
|
||||
},
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"custom": {}
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 8,
|
||||
"w": 12,
|
||||
"x": 0,
|
||||
"y": 16
|
||||
},
|
||||
"id": 5,
|
||||
"options": {
|
||||
"legend": {
|
||||
"calcs": ["lastNotNull"],
|
||||
"displayMode": "table",
|
||||
"placement": "bottom",
|
||||
"showLegend": true
|
||||
}
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
"expr": "gossip_queue_size_by_topic",
|
||||
"legendFormat": "{{topic}}",
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
"title": "Gossip Queue Depth by Topic",
|
||||
"type": "timeseries"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "PROMETHEUS_DS"
|
||||
},
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"custom": {}
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 8,
|
||||
"w": 12,
|
||||
"x": 12,
|
||||
"y": 16
|
||||
},
|
||||
"id": 6,
|
||||
"options": {
|
||||
"legend": {
|
||||
"calcs": ["lastNotNull"],
|
||||
"displayMode": "table",
|
||||
"placement": "bottom",
|
||||
"showLegend": true
|
||||
}
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
"expr": "gossip_subscribers_by_topic",
|
||||
"legendFormat": "{{topic}}",
|
||||
"refId": "A"
|
||||
},
|
||||
{
|
||||
"expr": "gossip_subscribers_total",
|
||||
"legendFormat": "total subscribers",
|
||||
"refId": "B"
|
||||
},
|
||||
{
|
||||
"expr": "gossip_broadcast_subscribers_total",
|
||||
"legendFormat": "broadcast subscribers",
|
||||
"refId": "C"
|
||||
}
|
||||
],
|
||||
"title": "Gossip Subscriber Counts",
|
||||
"type": "timeseries"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "PROMETHEUS_DS"
|
||||
},
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"custom": {},
|
||||
"unit": "ops"
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 8,
|
||||
"w": 12,
|
||||
"x": 0,
|
||||
"y": 24
|
||||
},
|
||||
"id": 7,
|
||||
"options": {
|
||||
"legend": {
|
||||
"calcs": ["lastNotNull"],
|
||||
"displayMode": "table",
|
||||
"placement": "bottom",
|
||||
"showLegend": true
|
||||
}
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
"expr": "gossip_publications_rate_per_sec",
|
||||
"legendFormat": "memory backend",
|
||||
"refId": "A"
|
||||
},
|
||||
{
|
||||
"expr": "gossip_broadcast_publications_rate_per_sec",
|
||||
"legendFormat": "broadcast backend",
|
||||
"refId": "B"
|
||||
}
|
||||
],
|
||||
"title": "Gossip Publication Rate (total)",
|
||||
"type": "timeseries"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "PROMETHEUS_DS"
|
||||
},
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"custom": {},
|
||||
"unit": "ops"
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 8,
|
||||
"w": 12,
|
||||
"x": 12,
|
||||
"y": 24
|
||||
},
|
||||
"id": 8,
|
||||
"options": {
|
||||
"legend": {
|
||||
"calcs": ["lastNotNull"],
|
||||
"displayMode": "table",
|
||||
"placement": "bottom",
|
||||
"showLegend": true
|
||||
}
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
"expr": "gossip_publications_topic_rate_per_sec",
|
||||
"legendFormat": "{{topic}}",
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
"title": "Gossip Publication Rate by Topic",
|
||||
"type": "timeseries"
|
||||
}
|
||||
],
|
||||
"refresh": "10s",
|
||||
"schemaVersion": 39,
|
||||
"style": "dark",
|
||||
"tags": [
|
||||
"aitbc",
|
||||
"blockchain-node"
|
||||
],
|
||||
"templating": {
|
||||
"list": []
|
||||
},
|
||||
"time": {
|
||||
"from": "now-30m",
|
||||
"to": "now"
|
||||
},
|
||||
"timepicker": {},
|
||||
"timezone": "",
|
||||
"title": "AITBC Blockchain Node",
|
||||
"uid": null,
|
||||
"version": 1,
|
||||
"weekStart": ""
|
||||
}
|
||||
28
apps/blockchain-node/observability/prometheus.yml
Normal file
28
apps/blockchain-node/observability/prometheus.yml
Normal file
@ -0,0 +1,28 @@
|
||||
global:
|
||||
scrape_interval: 5s
|
||||
evaluation_interval: 10s
|
||||
|
||||
alerting:
|
||||
alertmanagers:
|
||||
- static_configs:
|
||||
- targets:
|
||||
- "127.0.0.1:9093"
|
||||
|
||||
scrape_configs:
|
||||
- job_name: "blockchain-node"
|
||||
static_configs:
|
||||
- targets:
|
||||
- "127.0.0.1:8080"
|
||||
labels:
|
||||
service: "blockchain-node"
|
||||
|
||||
- job_name: "mock-coordinator"
|
||||
static_configs:
|
||||
- targets:
|
||||
- "127.0.0.1:8090"
|
||||
labels:
|
||||
service: "mock-coordinator"
|
||||
|
||||
rule_files:
|
||||
- alerts.yml
|
||||
- gossip-recording-rules.yml
|
||||
Reference in New Issue
Block a user