Based on the repository's commit message style and the changes in the diff, here's an appropriate commit message:
``` feat: add websocket tests, PoA metrics, marketplace endpoints, and enhanced observability - Add comprehensive websocket tests for blocks and transactions streams including multi-subscriber and high-volume scenarios - Extend PoA consensus with per-proposer block metrics and rotation tracking - Add latest block interval gauge and RPC error spike alerting - Enhance mock coordinator
This commit is contained in:
23
apps/blockchain-node/data/devnet/genesis.json
Normal file
23
apps/blockchain-node/data/devnet/genesis.json
Normal file
@@ -0,0 +1,23 @@
|
||||
{
|
||||
"accounts": [
|
||||
{
|
||||
"address": "ait1faucet000000000000000000000000000000000",
|
||||
"balance": 1000000000,
|
||||
"nonce": 0
|
||||
}
|
||||
],
|
||||
"authorities": [
|
||||
{
|
||||
"address": "ait1devproposer000000000000000000000000000000",
|
||||
"weight": 1
|
||||
}
|
||||
],
|
||||
"chain_id": "ait-devnet",
|
||||
"params": {
|
||||
"base_fee": 10,
|
||||
"coordinator_ratio": 0.05,
|
||||
"fee_per_byte": 1,
|
||||
"mint_per_unit": 1000
|
||||
},
|
||||
"timestamp": 1766383019
|
||||
}
|
||||
@@ -8,7 +8,7 @@ This directory contains Prometheus and Grafana assets for the devnet environment
|
||||
## Files
|
||||
|
||||
- `prometheus.yml` – Scrapes both blockchain node and mock coordinator/miner metrics.
|
||||
- `grafana-dashboard.json` – Panels for block interval, RPC throughput, miner activity, coordinator receipt flow, **plus new gossip queue, subscriber, and publication rate panels**.
|
||||
- `grafana-dashboard.json` – Panels for block interval (including latest interval gauge), RPC throughput, miner activity, coordinator receipt flow, gossip queue/subscriber/publication metrics, and PoA proposer visibility (rotation counts, blocks proposed per proposer).
|
||||
- `alerts.yml` – Alertmanager rules highlighting proposer stalls, miner errors, and coordinator receipt drop-offs.
|
||||
- `gossip-recording-rules.yml` – Prometheus recording rules that derive queue/subscriber gauges and publication rates from gossip metrics.
|
||||
|
||||
|
||||
@@ -41,3 +41,13 @@ groups:
|
||||
summary: "No receipts attested in 5 minutes"
|
||||
description: |
|
||||
Receipt attestations ceased during the last five minutes. Inspect coordinator connectivity.
|
||||
|
||||
- alert: RpcErrorsSpiking
|
||||
expr: increase(rpc_request_failures_total[5m]) > 0
|
||||
for: 5m
|
||||
labels:
|
||||
severity: warning
|
||||
annotations:
|
||||
summary: "RPC error spike detected"
|
||||
description: |
|
||||
RPC request failures have increased during the last five minutes. Investigate rpc_request_failures_total for details.
|
||||
|
||||
@@ -0,0 +1,255 @@
|
||||
{
|
||||
"annotations": {
|
||||
"list": []
|
||||
},
|
||||
"editable": true,
|
||||
"panels": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "${DS_PROMETHEUS}"
|
||||
},
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {
|
||||
"mode": "palette-classic"
|
||||
},
|
||||
"mappings": [],
|
||||
"thresholds": {
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{
|
||||
"color": "green",
|
||||
"value": null
|
||||
},
|
||||
{
|
||||
"color": "red",
|
||||
"value": 80
|
||||
}
|
||||
]
|
||||
}
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 8,
|
||||
"w": 12,
|
||||
"x": 0,
|
||||
"y": 0
|
||||
},
|
||||
"id": 1,
|
||||
"options": {
|
||||
"legend": {
|
||||
"displayMode": "list",
|
||||
"placement": "bottom"
|
||||
},
|
||||
"tooltip": {
|
||||
"mode": "multi",
|
||||
"sort": "none"
|
||||
}
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "${DS_PROMETHEUS}"
|
||||
},
|
||||
"expr": "rate(blockchain_block_height[1m])",
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
"title": "Block Production Interval (seconds)",
|
||||
"type": "timeseries"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "${DS_PROMETHEUS}"
|
||||
},
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {
|
||||
"mode": "palette-classic"
|
||||
},
|
||||
"mappings": [],
|
||||
"thresholds": {
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{
|
||||
"color": "green",
|
||||
"value": null
|
||||
},
|
||||
{
|
||||
"color": "red",
|
||||
"value": 80
|
||||
}
|
||||
]
|
||||
}
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 8,
|
||||
"w": 12,
|
||||
"x": 12,
|
||||
"y": 0
|
||||
},
|
||||
"id": 2,
|
||||
"options": {
|
||||
"legend": {
|
||||
"displayMode": "list",
|
||||
"placement": "bottom"
|
||||
},
|
||||
"tooltip": {
|
||||
"mode": "multi",
|
||||
"sort": "none"
|
||||
}
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "${DS_PROMETHEUS}"
|
||||
},
|
||||
"expr": "avg_over_time(mempool_queue_depth[1m])",
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
"title": "Mempool Queue Depth",
|
||||
"type": "timeseries"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "${DS_PROMETHEUS}"
|
||||
},
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {
|
||||
"mode": "palette-classic"
|
||||
},
|
||||
"mappings": [],
|
||||
"thresholds": {
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{
|
||||
"color": "green",
|
||||
"value": null
|
||||
},
|
||||
{
|
||||
"color": "red",
|
||||
"value": 80
|
||||
}
|
||||
]
|
||||
}
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 8,
|
||||
"w": 12,
|
||||
"x": 0,
|
||||
"y": 8
|
||||
},
|
||||
"id": 3,
|
||||
"options": {
|
||||
"legend": {
|
||||
"displayMode": "list",
|
||||
"placement": "bottom"
|
||||
},
|
||||
"tooltip": {
|
||||
"mode": "multi",
|
||||
"sort": "none"
|
||||
}
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "${DS_PROMETHEUS}"
|
||||
},
|
||||
"expr": "avg_over_time(miner_queue_depth[1m])",
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
"title": "Miner Queue Depth",
|
||||
"type": "timeseries"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "${DS_PROMETHEUS}"
|
||||
},
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {
|
||||
"mode": "palette-classic"
|
||||
},
|
||||
"mappings": [],
|
||||
"thresholds": {
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{
|
||||
"color": "green",
|
||||
"value": null
|
||||
},
|
||||
{
|
||||
"color": "red",
|
||||
"value": 80
|
||||
}
|
||||
]
|
||||
}
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 8,
|
||||
"w": 12,
|
||||
"x": 12,
|
||||
"y": 8
|
||||
},
|
||||
"id": 4,
|
||||
"options": {
|
||||
"legend": {
|
||||
"displayMode": "list",
|
||||
"placement": "bottom"
|
||||
},
|
||||
"tooltip": {
|
||||
"mode": "multi",
|
||||
"sort": "none"
|
||||
}
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "${DS_PROMETHEUS}"
|
||||
},
|
||||
"expr": "avg_over_time(miner_job_duration_seconds_sum[1m]) / avg_over_time(miner_job_duration_seconds_count[1m])",
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
"title": "Miner Job Duration Seconds",
|
||||
"type": "timeseries"
|
||||
}
|
||||
],
|
||||
"refresh": "10s",
|
||||
"schemaVersion": 38,
|
||||
"style": "dark",
|
||||
"tags": [
|
||||
"aitbc",
|
||||
"blockchain"
|
||||
],
|
||||
"templating": {
|
||||
"list": []
|
||||
},
|
||||
"time": {
|
||||
"from": "now-5m",
|
||||
"to": "now"
|
||||
},
|
||||
"timepicker": {},
|
||||
"timezone": "",
|
||||
"title": "AITBC Blockchain Node",
|
||||
"uid": "aitbc-node",
|
||||
"version": 1
|
||||
}
|
||||
@@ -0,0 +1,322 @@
|
||||
{
|
||||
"annotations": {
|
||||
"list": []
|
||||
},
|
||||
"editable": true,
|
||||
"panels": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "${DS_PROMETHEUS}"
|
||||
},
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {
|
||||
"mode": "palette-classic"
|
||||
},
|
||||
"mappings": [],
|
||||
"thresholds": {
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{
|
||||
"color": "green",
|
||||
"value": null
|
||||
},
|
||||
{
|
||||
"color": "red",
|
||||
"value": 80
|
||||
}
|
||||
]
|
||||
}
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 8,
|
||||
"w": 12,
|
||||
"x": 0,
|
||||
"y": 0
|
||||
},
|
||||
"id": 1,
|
||||
"options": {
|
||||
"legend": {
|
||||
"displayMode": "list",
|
||||
"placement": "bottom"
|
||||
},
|
||||
"tooltip": {
|
||||
"mode": "multi",
|
||||
"sort": "none"
|
||||
}
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "${DS_PROMETHEUS}"
|
||||
},
|
||||
"expr": "rate(coordinator_jobs_submitted_total[1m])",
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
"title": "Jobs Submitted",
|
||||
"type": "timeseries"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "${DS_PROMETHEUS}"
|
||||
},
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {
|
||||
"mode": "palette-classic"
|
||||
},
|
||||
"mappings": [],
|
||||
"thresholds": {
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{
|
||||
"color": "green",
|
||||
"value": null
|
||||
},
|
||||
{
|
||||
"color": "red",
|
||||
"value": 80
|
||||
}
|
||||
]
|
||||
}
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 8,
|
||||
"w": 12,
|
||||
"x": 12,
|
||||
"y": 0
|
||||
},
|
||||
"id": 2,
|
||||
"options": {
|
||||
"legend": {
|
||||
"displayMode": "list",
|
||||
"placement": "bottom"
|
||||
},
|
||||
"tooltip": {
|
||||
"mode": "multi",
|
||||
"sort": "none"
|
||||
}
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "${DS_PROMETHEUS}"
|
||||
},
|
||||
"expr": "rate(coordinator_jobs_completed_total[1m])",
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
"title": "Jobs Completed",
|
||||
"type": "timeseries"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "${DS_PROMETHEUS}"
|
||||
},
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {
|
||||
"mode": "palette-classic"
|
||||
},
|
||||
"mappings": [],
|
||||
"thresholds": {
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{
|
||||
"color": "green",
|
||||
"value": null
|
||||
},
|
||||
{
|
||||
"color": "red",
|
||||
"value": 80
|
||||
}
|
||||
]
|
||||
}
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 8,
|
||||
"w": 12,
|
||||
"x": 0,
|
||||
"y": 8
|
||||
},
|
||||
"id": 3,
|
||||
"options": {
|
||||
"legend": {
|
||||
"displayMode": "list",
|
||||
"placement": "bottom"
|
||||
},
|
||||
"tooltip": {
|
||||
"mode": "multi",
|
||||
"sort": "none"
|
||||
}
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "${DS_PROMETHEUS}"
|
||||
},
|
||||
"expr": "rate(coordinator_jobs_failed_total[1m])",
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
"title": "Jobs Failed",
|
||||
"type": "timeseries"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "${DS_PROMETHEUS}"
|
||||
},
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"mappings": [],
|
||||
"thresholds": {
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{
|
||||
"color": "green",
|
||||
"value": null
|
||||
},
|
||||
{
|
||||
"color": "orange",
|
||||
"value": 5
|
||||
},
|
||||
{
|
||||
"color": "red",
|
||||
"value": 10
|
||||
}
|
||||
]
|
||||
}
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 4,
|
||||
"w": 6,
|
||||
"x": 12,
|
||||
"y": 8
|
||||
},
|
||||
"id": 4,
|
||||
"options": {
|
||||
"colorMode": "value",
|
||||
"graphMode": "none",
|
||||
"justifyMode": "auto",
|
||||
"orientation": "horizontal",
|
||||
"reduceOptions": {
|
||||
"calcs": [
|
||||
"lastNotNull"
|
||||
],
|
||||
"fields": "",
|
||||
"values": false
|
||||
},
|
||||
"textMode": "auto"
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "${DS_PROMETHEUS}"
|
||||
},
|
||||
"expr": "miner_active_jobs",
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
"title": "Active Jobs",
|
||||
"type": "stat"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "${DS_PROMETHEUS}"
|
||||
},
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"mappings": [],
|
||||
"thresholds": {
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{
|
||||
"color": "green",
|
||||
"value": null
|
||||
},
|
||||
{
|
||||
"color": "orange",
|
||||
"value": 5
|
||||
},
|
||||
{
|
||||
"color": "red",
|
||||
"value": 10
|
||||
}
|
||||
]
|
||||
}
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 4,
|
||||
"w": 6,
|
||||
"x": 18,
|
||||
"y": 8
|
||||
},
|
||||
"id": 5,
|
||||
"options": {
|
||||
"colorMode": "value",
|
||||
"graphMode": "none",
|
||||
"justifyMode": "auto",
|
||||
"orientation": "horizontal",
|
||||
"reduceOptions": {
|
||||
"calcs": [
|
||||
"lastNotNull"
|
||||
],
|
||||
"fields": "",
|
||||
"values": false
|
||||
},
|
||||
"textMode": "auto"
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "${DS_PROMETHEUS}"
|
||||
},
|
||||
"expr": "miner_error_rate",
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
"title": "Miner Error Rate",
|
||||
"type": "stat"
|
||||
}
|
||||
],
|
||||
"refresh": "10s",
|
||||
"schemaVersion": 38,
|
||||
"style": "dark",
|
||||
"tags": [
|
||||
"aitbc",
|
||||
"coordinator"
|
||||
],
|
||||
"templating": {
|
||||
"list": []
|
||||
},
|
||||
"time": {
|
||||
"from": "now-5m",
|
||||
"to": "now"
|
||||
},
|
||||
"timepicker": {},
|
||||
"timezone": "",
|
||||
"title": "AITBC Coordinator Overview",
|
||||
"uid": "aitbc-coordinator",
|
||||
"version": 1
|
||||
}
|
||||
@@ -352,6 +352,80 @@
|
||||
],
|
||||
"title": "Gossip Publication Rate by Topic",
|
||||
"type": "timeseries"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "PROMETHEUS_DS"
|
||||
},
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"custom": {},
|
||||
"unit": "none"
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 8,
|
||||
"w": 12,
|
||||
"x": 0,
|
||||
"y": 32
|
||||
},
|
||||
"id": 9,
|
||||
"options": {
|
||||
"legend": {
|
||||
"calcs": ["lastNotNull"],
|
||||
"displayMode": "table",
|
||||
"placement": "bottom",
|
||||
"showLegend": true
|
||||
}
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
"expr": "increase(poa_proposer_rotations_total[30m])",
|
||||
"legendFormat": "rotations (30m)",
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
"title": "Proposer Rotation Count",
|
||||
"type": "timeseries"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "PROMETHEUS_DS"
|
||||
},
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"custom": {},
|
||||
"unit": "ops"
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 8,
|
||||
"w": 12,
|
||||
"x": 12,
|
||||
"y": 32
|
||||
},
|
||||
"id": 10,
|
||||
"options": {
|
||||
"legend": {
|
||||
"calcs": ["lastNotNull"],
|
||||
"displayMode": "table",
|
||||
"placement": "bottom",
|
||||
"showLegend": true
|
||||
}
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
"expr": "label_replace(sum(rate({__name__=~\"poa_blocks_proposed_total_.*\"}[5m])) by (__name__), \"proposer\", \"$1\", \"__name__\", \"poa_blocks_proposed_total_(.*)\")",
|
||||
"legendFormat": "{{proposer}}",
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
"title": "Blocks Proposed per Proposer (5m rate)",
|
||||
"type": "timeseries"
|
||||
}
|
||||
],
|
||||
"refresh": "10s",
|
||||
|
||||
0
apps/blockchain-node/scripts/devnet_up.sh
Normal file → Executable file
0
apps/blockchain-node/scripts/devnet_up.sh
Normal file → Executable file
@@ -3,9 +3,11 @@
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
import contextlib
|
||||
import random
|
||||
import time
|
||||
from typing import Dict
|
||||
from collections import deque
|
||||
from typing import Deque, Dict, List
|
||||
|
||||
from fastapi import FastAPI
|
||||
from fastapi.responses import PlainTextResponse
|
||||
@@ -14,21 +16,78 @@ from aitbc_chain.metrics import metrics_registry
|
||||
|
||||
app = FastAPI(title="Mock Coordinator API", version="0.1.0")
|
||||
|
||||
SIMULATED_MINERS: List[str] = ["miner-alpha", "miner-beta", "miner-gamma"]
|
||||
SIMULATED_CLIENTS: List[str] = ["client-labs", "client-trading", "client-research"]
|
||||
|
||||
MOCK_JOBS: Dict[str, Dict[str, str]] = {
|
||||
"job_1": {"status": "complete", "price": "50000", "compute_units": 2500},
|
||||
"job_2": {"status": "complete", "price": "25000", "compute_units": 1200},
|
||||
}
|
||||
|
||||
_simulation_task: asyncio.Task | None = None
|
||||
_job_rollup: Deque[str] = deque(maxlen=120)
|
||||
|
||||
|
||||
def _simulate_miner_metrics() -> None:
|
||||
metrics_registry.set_gauge("miner_active_jobs", float(random.randint(0, 5)))
|
||||
active_jobs = random.randint(1, 6)
|
||||
metrics_registry.set_gauge("miner_active_jobs", float(active_jobs))
|
||||
metrics_registry.set_gauge("miner_error_rate", float(random.randint(0, 1)))
|
||||
metrics_registry.observe("miner_job_duration_seconds", random.uniform(1.0, 5.0))
|
||||
metrics_registry.observe("miner_job_duration_seconds", random.uniform(1.5, 8.0))
|
||||
metrics_registry.observe("miner_queue_depth", float(random.randint(0, 12)))
|
||||
|
||||
|
||||
async def _simulation_loop() -> None:
|
||||
job_counter = 3
|
||||
while True:
|
||||
_simulate_miner_metrics()
|
||||
|
||||
job_id = f"job_{job_counter}"
|
||||
client = random.choice(SIMULATED_CLIENTS)
|
||||
miner = random.choice(SIMULATED_MINERS)
|
||||
price = random.randint(15_000, 75_000)
|
||||
compute_units = random.randint(750, 5000)
|
||||
|
||||
MOCK_JOBS[job_id] = {
|
||||
"status": random.choice(["complete", "pending", "failed"]),
|
||||
"price": str(price),
|
||||
"compute_units": compute_units,
|
||||
"client": client,
|
||||
"assigned_miner": miner,
|
||||
}
|
||||
_job_rollup.append(job_id)
|
||||
|
||||
if len(MOCK_JOBS) > _job_rollup.maxlen:
|
||||
oldest = _job_rollup.popleft()
|
||||
MOCK_JOBS.pop(oldest, None)
|
||||
|
||||
metrics_registry.increment("coordinator_jobs_submitted_total")
|
||||
metrics_registry.observe("coordinator_job_price", float(price))
|
||||
metrics_registry.observe("coordinator_job_compute_units", float(compute_units))
|
||||
|
||||
if MOCK_JOBS[job_id]["status"] == "failed":
|
||||
metrics_registry.increment("coordinator_jobs_failed_total")
|
||||
else:
|
||||
metrics_registry.increment("coordinator_jobs_completed_total")
|
||||
|
||||
job_counter += 1
|
||||
await asyncio.sleep(random.uniform(1.5, 3.5))
|
||||
|
||||
|
||||
@app.on_event("startup")
|
||||
async def _startup() -> None:
|
||||
global _simulation_task
|
||||
_simulate_miner_metrics()
|
||||
_simulation_task = asyncio.create_task(_simulation_loop())
|
||||
|
||||
|
||||
@app.on_event("shutdown")
|
||||
async def _shutdown() -> None:
|
||||
global _simulation_task
|
||||
if _simulation_task:
|
||||
_simulation_task.cancel()
|
||||
with contextlib.suppress(asyncio.CancelledError):
|
||||
await _simulation_task
|
||||
_simulation_task = None
|
||||
|
||||
|
||||
@app.get("/health")
|
||||
|
||||
@@ -4,12 +4,21 @@ import asyncio
|
||||
import hashlib
|
||||
from dataclasses import dataclass
|
||||
from datetime import datetime
|
||||
import re
|
||||
from typing import Callable, ContextManager, Optional
|
||||
|
||||
from sqlmodel import Session, select
|
||||
|
||||
from ..logging import get_logger
|
||||
from ..metrics import metrics_registry
|
||||
|
||||
|
||||
_METRIC_KEY_SANITIZE = re.compile(r"[^0-9a-zA-Z]+")
|
||||
|
||||
|
||||
def _sanitize_metric_suffix(value: str) -> str:
|
||||
sanitized = _METRIC_KEY_SANITIZE.sub("_", value).strip("_")
|
||||
return sanitized or "unknown"
|
||||
from ..models import Block
|
||||
from ..gossip import gossip_broker
|
||||
|
||||
@@ -33,6 +42,7 @@ class PoAProposer:
|
||||
self._logger = get_logger(__name__)
|
||||
self._stop_event = asyncio.Event()
|
||||
self._task: Optional[asyncio.Task[None]] = None
|
||||
self._last_proposer_id: Optional[str] = None
|
||||
|
||||
async def start(self) -> None:
|
||||
if self._task is not None:
|
||||
@@ -104,6 +114,13 @@ class PoAProposer:
|
||||
metrics_registry.set_gauge("chain_head_height", float(next_height))
|
||||
if interval_seconds is not None and interval_seconds >= 0:
|
||||
metrics_registry.observe("block_interval_seconds", interval_seconds)
|
||||
metrics_registry.set_gauge("poa_last_block_interval_seconds", float(interval_seconds))
|
||||
|
||||
proposer_suffix = _sanitize_metric_suffix(self._config.proposer_id)
|
||||
metrics_registry.increment(f"poa_blocks_proposed_total_{proposer_suffix}")
|
||||
if self._last_proposer_id is not None and self._last_proposer_id != self._config.proposer_id:
|
||||
metrics_registry.increment("poa_proposer_rotations_total")
|
||||
self._last_proposer_id = self._config.proposer_id
|
||||
|
||||
asyncio.create_task(
|
||||
gossip_broker.publish(
|
||||
|
||||
@@ -2,7 +2,7 @@ from __future__ import annotations
|
||||
|
||||
from datetime import datetime
|
||||
import re
|
||||
from typing import List, Optional
|
||||
from typing import Optional
|
||||
|
||||
from pydantic import field_validator
|
||||
from sqlalchemy import Column
|
||||
@@ -34,8 +34,8 @@ class Block(SQLModel, table=True):
|
||||
tx_count: int = 0
|
||||
state_root: Optional[str] = None
|
||||
|
||||
transactions: List["Transaction"] = Relationship(back_populates="block")
|
||||
receipts: List["Receipt"] = Relationship(back_populates="block")
|
||||
transactions: list["Transaction"] = Relationship(back_populates="block")
|
||||
receipts: list["Receipt"] = Relationship(back_populates="block")
|
||||
|
||||
@field_validator("hash", mode="before")
|
||||
@classmethod
|
||||
@@ -69,7 +69,7 @@ class Transaction(SQLModel, table=True):
|
||||
)
|
||||
created_at: datetime = Field(default_factory=datetime.utcnow, index=True)
|
||||
|
||||
block: Optional[Block] = Relationship(back_populates="transactions")
|
||||
block: Optional["Block"] = Relationship(back_populates="transactions")
|
||||
|
||||
@field_validator("tx_hash", mode="before")
|
||||
@classmethod
|
||||
@@ -101,7 +101,7 @@ class Receipt(SQLModel, table=True):
|
||||
minted_amount: Optional[int] = None
|
||||
recorded_at: datetime = Field(default_factory=datetime.utcnow, index=True)
|
||||
|
||||
block: Optional[Block] = Relationship(back_populates="receipts")
|
||||
block: Optional["Block"] = Relationship(back_populates="receipts")
|
||||
|
||||
@field_validator("receipt_id", mode="before")
|
||||
@classmethod
|
||||
|
||||
@@ -0,0 +1,9 @@
|
||||
"""Observability tooling for the AITBC blockchain node."""
|
||||
|
||||
from .dashboards import generate_default_dashboards
|
||||
from .exporters import register_exporters
|
||||
|
||||
__all__ = [
|
||||
"generate_default_dashboards",
|
||||
"register_exporters",
|
||||
]
|
||||
267
apps/blockchain-node/src/aitbc_chain/observability/dashboards.py
Normal file
267
apps/blockchain-node/src/aitbc_chain/observability/dashboards.py
Normal file
@@ -0,0 +1,267 @@
|
||||
"""Generate Grafana dashboards for the devnet observability stack."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
from pathlib import Path
|
||||
from typing import Dict, Iterable
|
||||
|
||||
|
||||
def _timeseries_panel(
|
||||
panel_id: int,
|
||||
title: str,
|
||||
expr: str,
|
||||
grid_x: int,
|
||||
grid_y: int,
|
||||
datasource_uid: str,
|
||||
) -> Dict[str, object]:
|
||||
return {
|
||||
"datasource": {"type": "prometheus", "uid": datasource_uid},
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {"mode": "palette-classic"},
|
||||
"mappings": [],
|
||||
"thresholds": {
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{"color": "green", "value": None},
|
||||
{"color": "red", "value": 80},
|
||||
],
|
||||
},
|
||||
},
|
||||
"overrides": [],
|
||||
},
|
||||
"gridPos": {"h": 8, "w": 12, "x": grid_x, "y": grid_y},
|
||||
"id": panel_id,
|
||||
"options": {
|
||||
"legend": {"displayMode": "list", "placement": "bottom"},
|
||||
"tooltip": {"mode": "multi", "sort": "none"},
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {"type": "prometheus", "uid": datasource_uid},
|
||||
"expr": expr,
|
||||
"refId": "A",
|
||||
}
|
||||
],
|
||||
"title": title,
|
||||
"type": "timeseries",
|
||||
}
|
||||
|
||||
|
||||
def _stat_panel(
|
||||
panel_id: int,
|
||||
title: str,
|
||||
expr: str,
|
||||
grid_x: int,
|
||||
grid_y: int,
|
||||
datasource_uid: str,
|
||||
) -> Dict[str, object]:
|
||||
return {
|
||||
"datasource": {"type": "prometheus", "uid": datasource_uid},
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"mappings": [],
|
||||
"thresholds": {
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{"color": "green", "value": None},
|
||||
{"color": "orange", "value": 5},
|
||||
{"color": "red", "value": 10},
|
||||
],
|
||||
},
|
||||
},
|
||||
"overrides": [],
|
||||
},
|
||||
"gridPos": {"h": 4, "w": 6, "x": grid_x, "y": grid_y},
|
||||
"id": panel_id,
|
||||
"options": {
|
||||
"colorMode": "value",
|
||||
"graphMode": "none",
|
||||
"justifyMode": "auto",
|
||||
"orientation": "horizontal",
|
||||
"reduceOptions": {"calcs": ["lastNotNull"], "fields": "", "values": False},
|
||||
"textMode": "auto",
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {"type": "prometheus", "uid": datasource_uid},
|
||||
"expr": expr,
|
||||
"refId": "A",
|
||||
}
|
||||
],
|
||||
"title": title,
|
||||
"type": "stat",
|
||||
}
|
||||
|
||||
|
||||
def _coordinator_dashboard(datasource_uid: str) -> Dict[str, object]:
|
||||
return {
|
||||
"uid": "aitbc-coordinator",
|
||||
"title": "AITBC Coordinator Overview",
|
||||
"editable": True,
|
||||
"tags": ["aitbc", "coordinator"],
|
||||
"timezone": "",
|
||||
"schemaVersion": 38,
|
||||
"version": 1,
|
||||
"refresh": "10s",
|
||||
"style": "dark",
|
||||
"annotations": {"list": []},
|
||||
"templating": {"list": []},
|
||||
"time": {"from": "now-5m", "to": "now"},
|
||||
"timepicker": {},
|
||||
"panels": [
|
||||
_timeseries_panel(
|
||||
panel_id=1,
|
||||
title="Jobs Submitted",
|
||||
expr="rate(coordinator_jobs_submitted_total[1m])",
|
||||
grid_x=0,
|
||||
grid_y=0,
|
||||
datasource_uid=datasource_uid,
|
||||
),
|
||||
_timeseries_panel(
|
||||
panel_id=2,
|
||||
title="Jobs Completed",
|
||||
expr="rate(coordinator_jobs_completed_total[1m])",
|
||||
grid_x=12,
|
||||
grid_y=0,
|
||||
datasource_uid=datasource_uid,
|
||||
),
|
||||
_timeseries_panel(
|
||||
panel_id=3,
|
||||
title="Jobs Failed",
|
||||
expr="rate(coordinator_jobs_failed_total[1m])",
|
||||
grid_x=0,
|
||||
grid_y=8,
|
||||
datasource_uid=datasource_uid,
|
||||
),
|
||||
_timeseries_panel(
|
||||
panel_id=6,
|
||||
title="Average Bid Price",
|
||||
expr="avg_over_time(coordinator_job_price[5m])",
|
||||
grid_x=12,
|
||||
grid_y=8,
|
||||
datasource_uid=datasource_uid,
|
||||
),
|
||||
_stat_panel(
|
||||
panel_id=4,
|
||||
title="Active Jobs",
|
||||
expr="miner_active_jobs",
|
||||
grid_x=0,
|
||||
grid_y=16,
|
||||
datasource_uid=datasource_uid,
|
||||
),
|
||||
_stat_panel(
|
||||
panel_id=5,
|
||||
title="Miner Error Rate",
|
||||
expr="miner_error_rate",
|
||||
grid_x=6,
|
||||
grid_y=16,
|
||||
datasource_uid=datasource_uid,
|
||||
),
|
||||
_stat_panel(
|
||||
panel_id=7,
|
||||
title="Avg Compute Units",
|
||||
expr="avg_over_time(coordinator_job_compute_units[5m])",
|
||||
grid_x=12,
|
||||
grid_y=16,
|
||||
datasource_uid=datasource_uid,
|
||||
),
|
||||
],
|
||||
}
|
||||
|
||||
|
||||
def _node_dashboard(datasource_uid: str) -> Dict[str, object]:
|
||||
return {
|
||||
"uid": "aitbc-node",
|
||||
"title": "AITBC Blockchain Node",
|
||||
"editable": True,
|
||||
"tags": ["aitbc", "blockchain"],
|
||||
"timezone": "",
|
||||
"schemaVersion": 38,
|
||||
"version": 1,
|
||||
"refresh": "10s",
|
||||
"style": "dark",
|
||||
"annotations": {"list": []},
|
||||
"templating": {"list": []},
|
||||
"time": {"from": "now-5m", "to": "now"},
|
||||
"timepicker": {},
|
||||
"panels": [
|
||||
_timeseries_panel(
|
||||
panel_id=1,
|
||||
title="Block Production Interval (seconds)",
|
||||
expr="1 / rate(blockchain_block_height[1m])",
|
||||
grid_x=0,
|
||||
grid_y=0,
|
||||
datasource_uid=datasource_uid,
|
||||
),
|
||||
_timeseries_panel(
|
||||
panel_id=2,
|
||||
title="Mempool Queue Depth",
|
||||
expr="avg_over_time(mempool_queue_depth[1m])",
|
||||
grid_x=12,
|
||||
grid_y=0,
|
||||
datasource_uid=datasource_uid,
|
||||
),
|
||||
_timeseries_panel(
|
||||
panel_id=5,
|
||||
title="Proposer Rotation Count",
|
||||
expr="increase(poa_proposer_rotations_total[5m])",
|
||||
grid_x=0,
|
||||
grid_y=8,
|
||||
datasource_uid=datasource_uid,
|
||||
),
|
||||
_timeseries_panel(
|
||||
panel_id=3,
|
||||
title="Miner Queue Depth",
|
||||
expr="avg_over_time(miner_queue_depth[1m])",
|
||||
grid_x=12,
|
||||
grid_y=8,
|
||||
datasource_uid=datasource_uid,
|
||||
),
|
||||
_timeseries_panel(
|
||||
panel_id=4,
|
||||
title="Miner Job Duration Seconds",
|
||||
expr="avg_over_time(miner_job_duration_seconds_sum[1m]) / avg_over_time(miner_job_duration_seconds_count[1m])",
|
||||
grid_x=0,
|
||||
grid_y=16,
|
||||
datasource_uid=datasource_uid,
|
||||
),
|
||||
_timeseries_panel(
|
||||
panel_id=6,
|
||||
title="RPC 95th Percentile Latency",
|
||||
expr="histogram_quantile(0.95, sum(rate(rpc_request_duration_seconds_bucket[5m])) by (le))",
|
||||
grid_x=12,
|
||||
grid_y=16,
|
||||
datasource_uid=datasource_uid,
|
||||
),
|
||||
],
|
||||
}
|
||||
|
||||
|
||||
def _dashboard_payloads(datasource_uid: str) -> Iterable[tuple[str, Dict[str, object]]]:
|
||||
return (
|
||||
("coordinator-overview.json", _coordinator_dashboard(datasource_uid)),
|
||||
("blockchain-node-overview.json", _node_dashboard(datasource_uid)),
|
||||
)
|
||||
|
||||
|
||||
def generate_default_dashboards(output_dir: Path, datasource_uid: str = "${DS_PROMETHEUS}") -> None:
|
||||
"""Write Grafana dashboard JSON exports to ``output_dir``.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
output_dir:
|
||||
Directory that will receive the generated JSON files. It is created if
|
||||
it does not already exist.
|
||||
datasource_uid:
|
||||
Grafana datasource UID for Prometheus queries (defaults to the
|
||||
built-in "${DS_PROMETHEUS}" variable).
|
||||
"""
|
||||
|
||||
output_dir.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
for filename, payload in _dashboard_payloads(datasource_uid):
|
||||
dashboard_path = output_dir / filename
|
||||
with dashboard_path.open("w", encoding="utf-8") as fp:
|
||||
json.dump(payload, fp, indent=2, sort_keys=True)
|
||||
@@ -0,0 +1,17 @@
|
||||
"""Placeholder exporter registration for metrics/log sinks."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import Iterable
|
||||
|
||||
REGISTERED_EXPORTERS: list[str] = []
|
||||
|
||||
|
||||
def register_exporters(exporters: Iterable[str]) -> None:
|
||||
"""Attach exporters for observability pipelines.
|
||||
|
||||
Real implementations might wire Prometheus registrations, log shippers,
|
||||
or tracing exporters. For now, we simply record the names to keep track
|
||||
of requested sinks.
|
||||
"""
|
||||
REGISTERED_EXPORTERS.extend(exporters)
|
||||
@@ -1,8 +1,15 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import sys
|
||||
from pathlib import Path
|
||||
|
||||
import pytest
|
||||
from sqlmodel import SQLModel, Session, create_engine
|
||||
|
||||
PROJECT_ROOT = Path(__file__).resolve().parent.parent / "src"
|
||||
if str(PROJECT_ROOT) not in sys.path:
|
||||
sys.path.insert(0, str(PROJECT_ROOT))
|
||||
|
||||
from aitbc_chain.models import Block, Transaction, Receipt # noqa: F401 - ensure models imported for metadata
|
||||
|
||||
|
||||
|
||||
39
apps/blockchain-node/tests/test_observability_dashboards.py
Normal file
39
apps/blockchain-node/tests/test_observability_dashboards.py
Normal file
@@ -0,0 +1,39 @@
|
||||
"""Tests for the observability dashboard helpers."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
from pathlib import Path
|
||||
|
||||
from aitbc_chain.observability.dashboards import generate_default_dashboards
|
||||
from aitbc_chain.observability import exporters
|
||||
|
||||
|
||||
def test_generate_default_dashboards_creates_files(tmp_path: Path) -> None:
|
||||
output_dir = tmp_path / "dashboards"
|
||||
|
||||
generate_default_dashboards(output_dir, datasource_uid="prometheus")
|
||||
|
||||
expected_files = {
|
||||
"blockchain-node-overview.json",
|
||||
"coordinator-overview.json",
|
||||
}
|
||||
actual_files = {path.name for path in output_dir.glob("*.json")}
|
||||
|
||||
assert actual_files == expected_files
|
||||
|
||||
for file_path in output_dir.glob("*.json"):
|
||||
with file_path.open("r", encoding="utf-8") as handle:
|
||||
payload = json.load(handle)
|
||||
|
||||
assert payload["uid"] in {"aitbc-coordinator", "aitbc-node"}
|
||||
assert payload["title"].startswith("AITBC")
|
||||
assert payload["panels"], "Dashboard should contain at least one panel"
|
||||
|
||||
|
||||
def test_register_exporters_tracks_names() -> None:
|
||||
exporters.REGISTERED_EXPORTERS.clear()
|
||||
|
||||
exporters.register_exporters(["prometheus", "loki"])
|
||||
|
||||
assert exporters.REGISTERED_EXPORTERS == ["prometheus", "loki"]
|
||||
@@ -1,6 +1,7 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
from contextlib import ExitStack
|
||||
|
||||
from fastapi.testclient import TestClient
|
||||
|
||||
@@ -10,8 +11,6 @@ from aitbc_chain.gossip import gossip_broker
|
||||
|
||||
def _publish(topic: str, message: dict) -> None:
|
||||
asyncio.run(gossip_broker.publish(topic, message))
|
||||
|
||||
|
||||
def test_blocks_websocket_stream() -> None:
|
||||
client = TestClient(create_app())
|
||||
|
||||
@@ -28,19 +27,100 @@ def test_blocks_websocket_stream() -> None:
|
||||
assert message == payload
|
||||
|
||||
|
||||
def test_transactions_websocket_stream() -> None:
|
||||
def test_blocks_websocket_multiple_subscribers_receive_all_payloads() -> None:
|
||||
with TestClient(create_app()) as client, ExitStack() as stack:
|
||||
sockets = [
|
||||
stack.enter_context(client.websocket_connect("/rpc/ws/blocks"))
|
||||
for _ in range(3)
|
||||
]
|
||||
|
||||
payloads = [
|
||||
{
|
||||
"height": height,
|
||||
"hash": "0x" + f"{height:064x}",
|
||||
"parent_hash": (
|
||||
"0x" + f"{height - 1:064x}" if height > 0 else "0x" + "0" * 64
|
||||
),
|
||||
"timestamp": f"2025-01-01T00:00:{height:02d}Z",
|
||||
"tx_count": height % 3,
|
||||
}
|
||||
for height in range(5)
|
||||
]
|
||||
|
||||
for payload in payloads:
|
||||
_publish("blocks", payload)
|
||||
|
||||
for socket in sockets:
|
||||
received = [socket.receive_json() for _ in payloads]
|
||||
assert received == payloads
|
||||
|
||||
# Publish another payload to ensure subscribers continue receiving in order.
|
||||
final_payload = {
|
||||
"height": 99,
|
||||
"hash": "0x" + "f" * 64,
|
||||
"parent_hash": "0x" + "e" * 64,
|
||||
"timestamp": "2025-01-01T00:01:39Z",
|
||||
"tx_count": 5,
|
||||
}
|
||||
_publish("blocks", final_payload)
|
||||
|
||||
for socket in sockets:
|
||||
assert socket.receive_json() == final_payload
|
||||
|
||||
|
||||
def test_blocks_websocket_high_volume_load() -> None:
|
||||
message_count = 40
|
||||
subscriber_count = 4
|
||||
|
||||
with TestClient(create_app()) as client, ExitStack() as stack:
|
||||
sockets = [
|
||||
stack.enter_context(client.websocket_connect("/rpc/ws/blocks"))
|
||||
for _ in range(subscriber_count)
|
||||
]
|
||||
|
||||
payloads = []
|
||||
for height in range(message_count):
|
||||
payload = {
|
||||
"height": height,
|
||||
"hash": "0x" + f"{height + 100:064x}",
|
||||
"parent_hash": "0x" + f"{height + 99:064x}" if height > 0 else "0x" + "0" * 64,
|
||||
"timestamp": f"2025-01-01T00:{height // 60:02d}:{height % 60:02d}Z",
|
||||
"tx_count": height % 7,
|
||||
}
|
||||
payloads.append(payload)
|
||||
_publish("blocks", payload)
|
||||
|
||||
for socket in sockets:
|
||||
received = [socket.receive_json() for _ in payloads]
|
||||
assert received == payloads
|
||||
|
||||
|
||||
def test_transactions_websocket_cleans_up_on_disconnect() -> None:
|
||||
client = TestClient(create_app())
|
||||
|
||||
with client.websocket_connect("/rpc/ws/transactions") as websocket:
|
||||
payload = {
|
||||
"tx_hash": "0x" + "a" * 64,
|
||||
"tx_hash": "0x" + "b" * 64,
|
||||
"sender": "alice",
|
||||
"recipient": "bob",
|
||||
"payload": {"amount": 1},
|
||||
"nonce": 1,
|
||||
"fee": 0,
|
||||
"recipient": "carol",
|
||||
"payload": {"amount": 2},
|
||||
"nonce": 7,
|
||||
"fee": 1,
|
||||
"type": "TRANSFER",
|
||||
}
|
||||
_publish("transactions", payload)
|
||||
message = websocket.receive_json()
|
||||
assert message == payload
|
||||
assert websocket.receive_json() == payload
|
||||
|
||||
# After closing the websocket, publishing again should not raise and should not hang.
|
||||
_publish(
|
||||
"transactions",
|
||||
{
|
||||
"tx_hash": "0x" + "c" * 64,
|
||||
"sender": "alice",
|
||||
"recipient": "dave",
|
||||
"payload": {"amount": 3},
|
||||
"nonce": 8,
|
||||
"fee": 1,
|
||||
"type": "TRANSFER",
|
||||
},
|
||||
)
|
||||
|
||||
Reference in New Issue
Block a user