Based on the repository's commit message style and the changes in the diff, here's an appropriate commit message:

```
feat: add websocket tests, PoA metrics, marketplace endpoints, and enhanced observability

- Add comprehensive websocket tests for blocks and transactions streams including multi-subscriber and high-volume scenarios
- Extend PoA consensus with per-proposer block metrics and rotation tracking
- Add latest block interval gauge and RPC error spike alerting
- Enhance mock coordinator
This commit is contained in:
oib
2025-12-22 07:55:09 +01:00
parent fb60505cdf
commit d98b2c7772
70 changed files with 3472 additions and 246 deletions

View File

@ -8,7 +8,7 @@ This directory contains Prometheus and Grafana assets for the devnet environment
## Files
- `prometheus.yml` Scrapes both blockchain node and mock coordinator/miner metrics.
- `grafana-dashboard.json` Panels for block interval, RPC throughput, miner activity, coordinator receipt flow, **plus new gossip queue, subscriber, and publication rate panels**.
- `grafana-dashboard.json` Panels for block interval (including latest interval gauge), RPC throughput, miner activity, coordinator receipt flow, gossip queue/subscriber/publication metrics, and PoA proposer visibility (rotation counts, blocks proposed per proposer).
- `alerts.yml` Alertmanager rules highlighting proposer stalls, miner errors, and coordinator receipt drop-offs.
- `gossip-recording-rules.yml` Prometheus recording rules that derive queue/subscriber gauges and publication rates from gossip metrics.

View File

@ -41,3 +41,13 @@ groups:
summary: "No receipts attested in 5 minutes"
description: |
Receipt attestations ceased during the last five minutes. Inspect coordinator connectivity.
- alert: RpcErrorsSpiking
expr: increase(rpc_request_failures_total[5m]) > 0
for: 5m
labels:
severity: warning
annotations:
summary: "RPC error spike detected"
description: |
RPC request failures have increased during the last five minutes. Investigate rpc_request_failures_total for details.

View File

@ -0,0 +1,255 @@
{
"annotations": {
"list": []
},
"editable": true,
"panels": [
{
"datasource": {
"type": "prometheus",
"uid": "${DS_PROMETHEUS}"
},
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "red",
"value": 80
}
]
}
},
"overrides": []
},
"gridPos": {
"h": 8,
"w": 12,
"x": 0,
"y": 0
},
"id": 1,
"options": {
"legend": {
"displayMode": "list",
"placement": "bottom"
},
"tooltip": {
"mode": "multi",
"sort": "none"
}
},
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "${DS_PROMETHEUS}"
},
"expr": "rate(blockchain_block_height[1m])",
"refId": "A"
}
],
"title": "Block Production Interval (seconds)",
"type": "timeseries"
},
{
"datasource": {
"type": "prometheus",
"uid": "${DS_PROMETHEUS}"
},
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "red",
"value": 80
}
]
}
},
"overrides": []
},
"gridPos": {
"h": 8,
"w": 12,
"x": 12,
"y": 0
},
"id": 2,
"options": {
"legend": {
"displayMode": "list",
"placement": "bottom"
},
"tooltip": {
"mode": "multi",
"sort": "none"
}
},
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "${DS_PROMETHEUS}"
},
"expr": "avg_over_time(mempool_queue_depth[1m])",
"refId": "A"
}
],
"title": "Mempool Queue Depth",
"type": "timeseries"
},
{
"datasource": {
"type": "prometheus",
"uid": "${DS_PROMETHEUS}"
},
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "red",
"value": 80
}
]
}
},
"overrides": []
},
"gridPos": {
"h": 8,
"w": 12,
"x": 0,
"y": 8
},
"id": 3,
"options": {
"legend": {
"displayMode": "list",
"placement": "bottom"
},
"tooltip": {
"mode": "multi",
"sort": "none"
}
},
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "${DS_PROMETHEUS}"
},
"expr": "avg_over_time(miner_queue_depth[1m])",
"refId": "A"
}
],
"title": "Miner Queue Depth",
"type": "timeseries"
},
{
"datasource": {
"type": "prometheus",
"uid": "${DS_PROMETHEUS}"
},
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "red",
"value": 80
}
]
}
},
"overrides": []
},
"gridPos": {
"h": 8,
"w": 12,
"x": 12,
"y": 8
},
"id": 4,
"options": {
"legend": {
"displayMode": "list",
"placement": "bottom"
},
"tooltip": {
"mode": "multi",
"sort": "none"
}
},
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "${DS_PROMETHEUS}"
},
"expr": "avg_over_time(miner_job_duration_seconds_sum[1m]) / avg_over_time(miner_job_duration_seconds_count[1m])",
"refId": "A"
}
],
"title": "Miner Job Duration Seconds",
"type": "timeseries"
}
],
"refresh": "10s",
"schemaVersion": 38,
"style": "dark",
"tags": [
"aitbc",
"blockchain"
],
"templating": {
"list": []
},
"time": {
"from": "now-5m",
"to": "now"
},
"timepicker": {},
"timezone": "",
"title": "AITBC Blockchain Node",
"uid": "aitbc-node",
"version": 1
}

View File

@ -0,0 +1,322 @@
{
"annotations": {
"list": []
},
"editable": true,
"panels": [
{
"datasource": {
"type": "prometheus",
"uid": "${DS_PROMETHEUS}"
},
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "red",
"value": 80
}
]
}
},
"overrides": []
},
"gridPos": {
"h": 8,
"w": 12,
"x": 0,
"y": 0
},
"id": 1,
"options": {
"legend": {
"displayMode": "list",
"placement": "bottom"
},
"tooltip": {
"mode": "multi",
"sort": "none"
}
},
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "${DS_PROMETHEUS}"
},
"expr": "rate(coordinator_jobs_submitted_total[1m])",
"refId": "A"
}
],
"title": "Jobs Submitted",
"type": "timeseries"
},
{
"datasource": {
"type": "prometheus",
"uid": "${DS_PROMETHEUS}"
},
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "red",
"value": 80
}
]
}
},
"overrides": []
},
"gridPos": {
"h": 8,
"w": 12,
"x": 12,
"y": 0
},
"id": 2,
"options": {
"legend": {
"displayMode": "list",
"placement": "bottom"
},
"tooltip": {
"mode": "multi",
"sort": "none"
}
},
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "${DS_PROMETHEUS}"
},
"expr": "rate(coordinator_jobs_completed_total[1m])",
"refId": "A"
}
],
"title": "Jobs Completed",
"type": "timeseries"
},
{
"datasource": {
"type": "prometheus",
"uid": "${DS_PROMETHEUS}"
},
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "red",
"value": 80
}
]
}
},
"overrides": []
},
"gridPos": {
"h": 8,
"w": 12,
"x": 0,
"y": 8
},
"id": 3,
"options": {
"legend": {
"displayMode": "list",
"placement": "bottom"
},
"tooltip": {
"mode": "multi",
"sort": "none"
}
},
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "${DS_PROMETHEUS}"
},
"expr": "rate(coordinator_jobs_failed_total[1m])",
"refId": "A"
}
],
"title": "Jobs Failed",
"type": "timeseries"
},
{
"datasource": {
"type": "prometheus",
"uid": "${DS_PROMETHEUS}"
},
"fieldConfig": {
"defaults": {
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "orange",
"value": 5
},
{
"color": "red",
"value": 10
}
]
}
},
"overrides": []
},
"gridPos": {
"h": 4,
"w": 6,
"x": 12,
"y": 8
},
"id": 4,
"options": {
"colorMode": "value",
"graphMode": "none",
"justifyMode": "auto",
"orientation": "horizontal",
"reduceOptions": {
"calcs": [
"lastNotNull"
],
"fields": "",
"values": false
},
"textMode": "auto"
},
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "${DS_PROMETHEUS}"
},
"expr": "miner_active_jobs",
"refId": "A"
}
],
"title": "Active Jobs",
"type": "stat"
},
{
"datasource": {
"type": "prometheus",
"uid": "${DS_PROMETHEUS}"
},
"fieldConfig": {
"defaults": {
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "orange",
"value": 5
},
{
"color": "red",
"value": 10
}
]
}
},
"overrides": []
},
"gridPos": {
"h": 4,
"w": 6,
"x": 18,
"y": 8
},
"id": 5,
"options": {
"colorMode": "value",
"graphMode": "none",
"justifyMode": "auto",
"orientation": "horizontal",
"reduceOptions": {
"calcs": [
"lastNotNull"
],
"fields": "",
"values": false
},
"textMode": "auto"
},
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "${DS_PROMETHEUS}"
},
"expr": "miner_error_rate",
"refId": "A"
}
],
"title": "Miner Error Rate",
"type": "stat"
}
],
"refresh": "10s",
"schemaVersion": 38,
"style": "dark",
"tags": [
"aitbc",
"coordinator"
],
"templating": {
"list": []
},
"time": {
"from": "now-5m",
"to": "now"
},
"timepicker": {},
"timezone": "",
"title": "AITBC Coordinator Overview",
"uid": "aitbc-coordinator",
"version": 1
}

View File

@ -352,6 +352,80 @@
],
"title": "Gossip Publication Rate by Topic",
"type": "timeseries"
},
{
"datasource": {
"type": "prometheus",
"uid": "PROMETHEUS_DS"
},
"fieldConfig": {
"defaults": {
"custom": {},
"unit": "none"
},
"overrides": []
},
"gridPos": {
"h": 8,
"w": 12,
"x": 0,
"y": 32
},
"id": 9,
"options": {
"legend": {
"calcs": ["lastNotNull"],
"displayMode": "table",
"placement": "bottom",
"showLegend": true
}
},
"targets": [
{
"expr": "increase(poa_proposer_rotations_total[30m])",
"legendFormat": "rotations (30m)",
"refId": "A"
}
],
"title": "Proposer Rotation Count",
"type": "timeseries"
},
{
"datasource": {
"type": "prometheus",
"uid": "PROMETHEUS_DS"
},
"fieldConfig": {
"defaults": {
"custom": {},
"unit": "ops"
},
"overrides": []
},
"gridPos": {
"h": 8,
"w": 12,
"x": 12,
"y": 32
},
"id": 10,
"options": {
"legend": {
"calcs": ["lastNotNull"],
"displayMode": "table",
"placement": "bottom",
"showLegend": true
}
},
"targets": [
{
"expr": "label_replace(sum(rate({__name__=~\"poa_blocks_proposed_total_.*\"}[5m])) by (__name__), \"proposer\", \"$1\", \"__name__\", \"poa_blocks_proposed_total_(.*)\")",
"legendFormat": "{{proposer}}",
"refId": "A"
}
],
"title": "Blocks Proposed per Proposer (5m rate)",
"type": "timeseries"
}
],
"refresh": "10s",