Based on the repository's commit message style and the changes in the diff, here's an appropriate commit message:

```
feat: add websocket tests, PoA metrics, marketplace endpoints, and enhanced observability

- Add comprehensive websocket tests for blocks and transactions streams including multi-subscriber and high-volume scenarios
- Extend PoA consensus with per-proposer block metrics and rotation tracking
- Add latest block interval gauge and RPC error spike alerting
- Enhance mock coordinator
This commit is contained in:
oib
2025-12-22 07:55:09 +01:00
parent b8b640666d
commit fa5a6fddf3
70 changed files with 3472 additions and 246 deletions

View File

@@ -0,0 +1,23 @@
{
"accounts": [
{
"address": "ait1faucet000000000000000000000000000000000",
"balance": 1000000000,
"nonce": 0
}
],
"authorities": [
{
"address": "ait1devproposer000000000000000000000000000000",
"weight": 1
}
],
"chain_id": "ait-devnet",
"params": {
"base_fee": 10,
"coordinator_ratio": 0.05,
"fee_per_byte": 1,
"mint_per_unit": 1000
},
"timestamp": 1766383019
}

View File

@@ -8,7 +8,7 @@ This directory contains Prometheus and Grafana assets for the devnet environment
## Files
- `prometheus.yml` Scrapes both blockchain node and mock coordinator/miner metrics.
- `grafana-dashboard.json` Panels for block interval, RPC throughput, miner activity, coordinator receipt flow, **plus new gossip queue, subscriber, and publication rate panels**.
- `grafana-dashboard.json` Panels for block interval (including latest interval gauge), RPC throughput, miner activity, coordinator receipt flow, gossip queue/subscriber/publication metrics, and PoA proposer visibility (rotation counts, blocks proposed per proposer).
- `alerts.yml` Alertmanager rules highlighting proposer stalls, miner errors, and coordinator receipt drop-offs.
- `gossip-recording-rules.yml` Prometheus recording rules that derive queue/subscriber gauges and publication rates from gossip metrics.

View File

@@ -41,3 +41,13 @@ groups:
summary: "No receipts attested in 5 minutes"
description: |
Receipt attestations ceased during the last five minutes. Inspect coordinator connectivity.
- alert: RpcErrorsSpiking
expr: increase(rpc_request_failures_total[5m]) > 0
for: 5m
labels:
severity: warning
annotations:
summary: "RPC error spike detected"
description: |
RPC request failures have increased during the last five minutes. Investigate rpc_request_failures_total for details.

View File

@@ -0,0 +1,255 @@
{
"annotations": {
"list": []
},
"editable": true,
"panels": [
{
"datasource": {
"type": "prometheus",
"uid": "${DS_PROMETHEUS}"
},
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "red",
"value": 80
}
]
}
},
"overrides": []
},
"gridPos": {
"h": 8,
"w": 12,
"x": 0,
"y": 0
},
"id": 1,
"options": {
"legend": {
"displayMode": "list",
"placement": "bottom"
},
"tooltip": {
"mode": "multi",
"sort": "none"
}
},
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "${DS_PROMETHEUS}"
},
"expr": "rate(blockchain_block_height[1m])",
"refId": "A"
}
],
"title": "Block Production Interval (seconds)",
"type": "timeseries"
},
{
"datasource": {
"type": "prometheus",
"uid": "${DS_PROMETHEUS}"
},
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "red",
"value": 80
}
]
}
},
"overrides": []
},
"gridPos": {
"h": 8,
"w": 12,
"x": 12,
"y": 0
},
"id": 2,
"options": {
"legend": {
"displayMode": "list",
"placement": "bottom"
},
"tooltip": {
"mode": "multi",
"sort": "none"
}
},
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "${DS_PROMETHEUS}"
},
"expr": "avg_over_time(mempool_queue_depth[1m])",
"refId": "A"
}
],
"title": "Mempool Queue Depth",
"type": "timeseries"
},
{
"datasource": {
"type": "prometheus",
"uid": "${DS_PROMETHEUS}"
},
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "red",
"value": 80
}
]
}
},
"overrides": []
},
"gridPos": {
"h": 8,
"w": 12,
"x": 0,
"y": 8
},
"id": 3,
"options": {
"legend": {
"displayMode": "list",
"placement": "bottom"
},
"tooltip": {
"mode": "multi",
"sort": "none"
}
},
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "${DS_PROMETHEUS}"
},
"expr": "avg_over_time(miner_queue_depth[1m])",
"refId": "A"
}
],
"title": "Miner Queue Depth",
"type": "timeseries"
},
{
"datasource": {
"type": "prometheus",
"uid": "${DS_PROMETHEUS}"
},
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "red",
"value": 80
}
]
}
},
"overrides": []
},
"gridPos": {
"h": 8,
"w": 12,
"x": 12,
"y": 8
},
"id": 4,
"options": {
"legend": {
"displayMode": "list",
"placement": "bottom"
},
"tooltip": {
"mode": "multi",
"sort": "none"
}
},
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "${DS_PROMETHEUS}"
},
"expr": "avg_over_time(miner_job_duration_seconds_sum[1m]) / avg_over_time(miner_job_duration_seconds_count[1m])",
"refId": "A"
}
],
"title": "Miner Job Duration Seconds",
"type": "timeseries"
}
],
"refresh": "10s",
"schemaVersion": 38,
"style": "dark",
"tags": [
"aitbc",
"blockchain"
],
"templating": {
"list": []
},
"time": {
"from": "now-5m",
"to": "now"
},
"timepicker": {},
"timezone": "",
"title": "AITBC Blockchain Node",
"uid": "aitbc-node",
"version": 1
}

View File

@@ -0,0 +1,322 @@
{
"annotations": {
"list": []
},
"editable": true,
"panels": [
{
"datasource": {
"type": "prometheus",
"uid": "${DS_PROMETHEUS}"
},
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "red",
"value": 80
}
]
}
},
"overrides": []
},
"gridPos": {
"h": 8,
"w": 12,
"x": 0,
"y": 0
},
"id": 1,
"options": {
"legend": {
"displayMode": "list",
"placement": "bottom"
},
"tooltip": {
"mode": "multi",
"sort": "none"
}
},
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "${DS_PROMETHEUS}"
},
"expr": "rate(coordinator_jobs_submitted_total[1m])",
"refId": "A"
}
],
"title": "Jobs Submitted",
"type": "timeseries"
},
{
"datasource": {
"type": "prometheus",
"uid": "${DS_PROMETHEUS}"
},
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "red",
"value": 80
}
]
}
},
"overrides": []
},
"gridPos": {
"h": 8,
"w": 12,
"x": 12,
"y": 0
},
"id": 2,
"options": {
"legend": {
"displayMode": "list",
"placement": "bottom"
},
"tooltip": {
"mode": "multi",
"sort": "none"
}
},
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "${DS_PROMETHEUS}"
},
"expr": "rate(coordinator_jobs_completed_total[1m])",
"refId": "A"
}
],
"title": "Jobs Completed",
"type": "timeseries"
},
{
"datasource": {
"type": "prometheus",
"uid": "${DS_PROMETHEUS}"
},
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "red",
"value": 80
}
]
}
},
"overrides": []
},
"gridPos": {
"h": 8,
"w": 12,
"x": 0,
"y": 8
},
"id": 3,
"options": {
"legend": {
"displayMode": "list",
"placement": "bottom"
},
"tooltip": {
"mode": "multi",
"sort": "none"
}
},
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "${DS_PROMETHEUS}"
},
"expr": "rate(coordinator_jobs_failed_total[1m])",
"refId": "A"
}
],
"title": "Jobs Failed",
"type": "timeseries"
},
{
"datasource": {
"type": "prometheus",
"uid": "${DS_PROMETHEUS}"
},
"fieldConfig": {
"defaults": {
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "orange",
"value": 5
},
{
"color": "red",
"value": 10
}
]
}
},
"overrides": []
},
"gridPos": {
"h": 4,
"w": 6,
"x": 12,
"y": 8
},
"id": 4,
"options": {
"colorMode": "value",
"graphMode": "none",
"justifyMode": "auto",
"orientation": "horizontal",
"reduceOptions": {
"calcs": [
"lastNotNull"
],
"fields": "",
"values": false
},
"textMode": "auto"
},
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "${DS_PROMETHEUS}"
},
"expr": "miner_active_jobs",
"refId": "A"
}
],
"title": "Active Jobs",
"type": "stat"
},
{
"datasource": {
"type": "prometheus",
"uid": "${DS_PROMETHEUS}"
},
"fieldConfig": {
"defaults": {
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "orange",
"value": 5
},
{
"color": "red",
"value": 10
}
]
}
},
"overrides": []
},
"gridPos": {
"h": 4,
"w": 6,
"x": 18,
"y": 8
},
"id": 5,
"options": {
"colorMode": "value",
"graphMode": "none",
"justifyMode": "auto",
"orientation": "horizontal",
"reduceOptions": {
"calcs": [
"lastNotNull"
],
"fields": "",
"values": false
},
"textMode": "auto"
},
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "${DS_PROMETHEUS}"
},
"expr": "miner_error_rate",
"refId": "A"
}
],
"title": "Miner Error Rate",
"type": "stat"
}
],
"refresh": "10s",
"schemaVersion": 38,
"style": "dark",
"tags": [
"aitbc",
"coordinator"
],
"templating": {
"list": []
},
"time": {
"from": "now-5m",
"to": "now"
},
"timepicker": {},
"timezone": "",
"title": "AITBC Coordinator Overview",
"uid": "aitbc-coordinator",
"version": 1
}

View File

@@ -352,6 +352,80 @@
],
"title": "Gossip Publication Rate by Topic",
"type": "timeseries"
},
{
"datasource": {
"type": "prometheus",
"uid": "PROMETHEUS_DS"
},
"fieldConfig": {
"defaults": {
"custom": {},
"unit": "none"
},
"overrides": []
},
"gridPos": {
"h": 8,
"w": 12,
"x": 0,
"y": 32
},
"id": 9,
"options": {
"legend": {
"calcs": ["lastNotNull"],
"displayMode": "table",
"placement": "bottom",
"showLegend": true
}
},
"targets": [
{
"expr": "increase(poa_proposer_rotations_total[30m])",
"legendFormat": "rotations (30m)",
"refId": "A"
}
],
"title": "Proposer Rotation Count",
"type": "timeseries"
},
{
"datasource": {
"type": "prometheus",
"uid": "PROMETHEUS_DS"
},
"fieldConfig": {
"defaults": {
"custom": {},
"unit": "ops"
},
"overrides": []
},
"gridPos": {
"h": 8,
"w": 12,
"x": 12,
"y": 32
},
"id": 10,
"options": {
"legend": {
"calcs": ["lastNotNull"],
"displayMode": "table",
"placement": "bottom",
"showLegend": true
}
},
"targets": [
{
"expr": "label_replace(sum(rate({__name__=~\"poa_blocks_proposed_total_.*\"}[5m])) by (__name__), \"proposer\", \"$1\", \"__name__\", \"poa_blocks_proposed_total_(.*)\")",
"legendFormat": "{{proposer}}",
"refId": "A"
}
],
"title": "Blocks Proposed per Proposer (5m rate)",
"type": "timeseries"
}
],
"refresh": "10s",

0
apps/blockchain-node/scripts/devnet_up.sh Normal file → Executable file
View File

View File

@@ -3,9 +3,11 @@
from __future__ import annotations
import asyncio
import contextlib
import random
import time
from typing import Dict
from collections import deque
from typing import Deque, Dict, List
from fastapi import FastAPI
from fastapi.responses import PlainTextResponse
@@ -14,21 +16,78 @@ from aitbc_chain.metrics import metrics_registry
app = FastAPI(title="Mock Coordinator API", version="0.1.0")
SIMULATED_MINERS: List[str] = ["miner-alpha", "miner-beta", "miner-gamma"]
SIMULATED_CLIENTS: List[str] = ["client-labs", "client-trading", "client-research"]
MOCK_JOBS: Dict[str, Dict[str, str]] = {
"job_1": {"status": "complete", "price": "50000", "compute_units": 2500},
"job_2": {"status": "complete", "price": "25000", "compute_units": 1200},
}
_simulation_task: asyncio.Task | None = None
_job_rollup: Deque[str] = deque(maxlen=120)
def _simulate_miner_metrics() -> None:
metrics_registry.set_gauge("miner_active_jobs", float(random.randint(0, 5)))
active_jobs = random.randint(1, 6)
metrics_registry.set_gauge("miner_active_jobs", float(active_jobs))
metrics_registry.set_gauge("miner_error_rate", float(random.randint(0, 1)))
metrics_registry.observe("miner_job_duration_seconds", random.uniform(1.0, 5.0))
metrics_registry.observe("miner_job_duration_seconds", random.uniform(1.5, 8.0))
metrics_registry.observe("miner_queue_depth", float(random.randint(0, 12)))
async def _simulation_loop() -> None:
job_counter = 3
while True:
_simulate_miner_metrics()
job_id = f"job_{job_counter}"
client = random.choice(SIMULATED_CLIENTS)
miner = random.choice(SIMULATED_MINERS)
price = random.randint(15_000, 75_000)
compute_units = random.randint(750, 5000)
MOCK_JOBS[job_id] = {
"status": random.choice(["complete", "pending", "failed"]),
"price": str(price),
"compute_units": compute_units,
"client": client,
"assigned_miner": miner,
}
_job_rollup.append(job_id)
if len(MOCK_JOBS) > _job_rollup.maxlen:
oldest = _job_rollup.popleft()
MOCK_JOBS.pop(oldest, None)
metrics_registry.increment("coordinator_jobs_submitted_total")
metrics_registry.observe("coordinator_job_price", float(price))
metrics_registry.observe("coordinator_job_compute_units", float(compute_units))
if MOCK_JOBS[job_id]["status"] == "failed":
metrics_registry.increment("coordinator_jobs_failed_total")
else:
metrics_registry.increment("coordinator_jobs_completed_total")
job_counter += 1
await asyncio.sleep(random.uniform(1.5, 3.5))
@app.on_event("startup")
async def _startup() -> None:
global _simulation_task
_simulate_miner_metrics()
_simulation_task = asyncio.create_task(_simulation_loop())
@app.on_event("shutdown")
async def _shutdown() -> None:
global _simulation_task
if _simulation_task:
_simulation_task.cancel()
with contextlib.suppress(asyncio.CancelledError):
await _simulation_task
_simulation_task = None
@app.get("/health")

View File

@@ -4,12 +4,21 @@ import asyncio
import hashlib
from dataclasses import dataclass
from datetime import datetime
import re
from typing import Callable, ContextManager, Optional
from sqlmodel import Session, select
from ..logging import get_logger
from ..metrics import metrics_registry
_METRIC_KEY_SANITIZE = re.compile(r"[^0-9a-zA-Z]+")
def _sanitize_metric_suffix(value: str) -> str:
sanitized = _METRIC_KEY_SANITIZE.sub("_", value).strip("_")
return sanitized or "unknown"
from ..models import Block
from ..gossip import gossip_broker
@@ -33,6 +42,7 @@ class PoAProposer:
self._logger = get_logger(__name__)
self._stop_event = asyncio.Event()
self._task: Optional[asyncio.Task[None]] = None
self._last_proposer_id: Optional[str] = None
async def start(self) -> None:
if self._task is not None:
@@ -104,6 +114,13 @@ class PoAProposer:
metrics_registry.set_gauge("chain_head_height", float(next_height))
if interval_seconds is not None and interval_seconds >= 0:
metrics_registry.observe("block_interval_seconds", interval_seconds)
metrics_registry.set_gauge("poa_last_block_interval_seconds", float(interval_seconds))
proposer_suffix = _sanitize_metric_suffix(self._config.proposer_id)
metrics_registry.increment(f"poa_blocks_proposed_total_{proposer_suffix}")
if self._last_proposer_id is not None and self._last_proposer_id != self._config.proposer_id:
metrics_registry.increment("poa_proposer_rotations_total")
self._last_proposer_id = self._config.proposer_id
asyncio.create_task(
gossip_broker.publish(

View File

@@ -2,7 +2,7 @@ from __future__ import annotations
from datetime import datetime
import re
from typing import List, Optional
from typing import Optional
from pydantic import field_validator
from sqlalchemy import Column
@@ -34,8 +34,8 @@ class Block(SQLModel, table=True):
tx_count: int = 0
state_root: Optional[str] = None
transactions: List["Transaction"] = Relationship(back_populates="block")
receipts: List["Receipt"] = Relationship(back_populates="block")
transactions: list["Transaction"] = Relationship(back_populates="block")
receipts: list["Receipt"] = Relationship(back_populates="block")
@field_validator("hash", mode="before")
@classmethod
@@ -69,7 +69,7 @@ class Transaction(SQLModel, table=True):
)
created_at: datetime = Field(default_factory=datetime.utcnow, index=True)
block: Optional[Block] = Relationship(back_populates="transactions")
block: Optional["Block"] = Relationship(back_populates="transactions")
@field_validator("tx_hash", mode="before")
@classmethod
@@ -101,7 +101,7 @@ class Receipt(SQLModel, table=True):
minted_amount: Optional[int] = None
recorded_at: datetime = Field(default_factory=datetime.utcnow, index=True)
block: Optional[Block] = Relationship(back_populates="receipts")
block: Optional["Block"] = Relationship(back_populates="receipts")
@field_validator("receipt_id", mode="before")
@classmethod

View File

@@ -0,0 +1,9 @@
"""Observability tooling for the AITBC blockchain node."""
from .dashboards import generate_default_dashboards
from .exporters import register_exporters
__all__ = [
"generate_default_dashboards",
"register_exporters",
]

View File

@@ -0,0 +1,267 @@
"""Generate Grafana dashboards for the devnet observability stack."""
from __future__ import annotations
import json
from pathlib import Path
from typing import Dict, Iterable
def _timeseries_panel(
panel_id: int,
title: str,
expr: str,
grid_x: int,
grid_y: int,
datasource_uid: str,
) -> Dict[str, object]:
return {
"datasource": {"type": "prometheus", "uid": datasource_uid},
"fieldConfig": {
"defaults": {
"color": {"mode": "palette-classic"},
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{"color": "green", "value": None},
{"color": "red", "value": 80},
],
},
},
"overrides": [],
},
"gridPos": {"h": 8, "w": 12, "x": grid_x, "y": grid_y},
"id": panel_id,
"options": {
"legend": {"displayMode": "list", "placement": "bottom"},
"tooltip": {"mode": "multi", "sort": "none"},
},
"targets": [
{
"datasource": {"type": "prometheus", "uid": datasource_uid},
"expr": expr,
"refId": "A",
}
],
"title": title,
"type": "timeseries",
}
def _stat_panel(
panel_id: int,
title: str,
expr: str,
grid_x: int,
grid_y: int,
datasource_uid: str,
) -> Dict[str, object]:
return {
"datasource": {"type": "prometheus", "uid": datasource_uid},
"fieldConfig": {
"defaults": {
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{"color": "green", "value": None},
{"color": "orange", "value": 5},
{"color": "red", "value": 10},
],
},
},
"overrides": [],
},
"gridPos": {"h": 4, "w": 6, "x": grid_x, "y": grid_y},
"id": panel_id,
"options": {
"colorMode": "value",
"graphMode": "none",
"justifyMode": "auto",
"orientation": "horizontal",
"reduceOptions": {"calcs": ["lastNotNull"], "fields": "", "values": False},
"textMode": "auto",
},
"targets": [
{
"datasource": {"type": "prometheus", "uid": datasource_uid},
"expr": expr,
"refId": "A",
}
],
"title": title,
"type": "stat",
}
def _coordinator_dashboard(datasource_uid: str) -> Dict[str, object]:
return {
"uid": "aitbc-coordinator",
"title": "AITBC Coordinator Overview",
"editable": True,
"tags": ["aitbc", "coordinator"],
"timezone": "",
"schemaVersion": 38,
"version": 1,
"refresh": "10s",
"style": "dark",
"annotations": {"list": []},
"templating": {"list": []},
"time": {"from": "now-5m", "to": "now"},
"timepicker": {},
"panels": [
_timeseries_panel(
panel_id=1,
title="Jobs Submitted",
expr="rate(coordinator_jobs_submitted_total[1m])",
grid_x=0,
grid_y=0,
datasource_uid=datasource_uid,
),
_timeseries_panel(
panel_id=2,
title="Jobs Completed",
expr="rate(coordinator_jobs_completed_total[1m])",
grid_x=12,
grid_y=0,
datasource_uid=datasource_uid,
),
_timeseries_panel(
panel_id=3,
title="Jobs Failed",
expr="rate(coordinator_jobs_failed_total[1m])",
grid_x=0,
grid_y=8,
datasource_uid=datasource_uid,
),
_timeseries_panel(
panel_id=6,
title="Average Bid Price",
expr="avg_over_time(coordinator_job_price[5m])",
grid_x=12,
grid_y=8,
datasource_uid=datasource_uid,
),
_stat_panel(
panel_id=4,
title="Active Jobs",
expr="miner_active_jobs",
grid_x=0,
grid_y=16,
datasource_uid=datasource_uid,
),
_stat_panel(
panel_id=5,
title="Miner Error Rate",
expr="miner_error_rate",
grid_x=6,
grid_y=16,
datasource_uid=datasource_uid,
),
_stat_panel(
panel_id=7,
title="Avg Compute Units",
expr="avg_over_time(coordinator_job_compute_units[5m])",
grid_x=12,
grid_y=16,
datasource_uid=datasource_uid,
),
],
}
def _node_dashboard(datasource_uid: str) -> Dict[str, object]:
return {
"uid": "aitbc-node",
"title": "AITBC Blockchain Node",
"editable": True,
"tags": ["aitbc", "blockchain"],
"timezone": "",
"schemaVersion": 38,
"version": 1,
"refresh": "10s",
"style": "dark",
"annotations": {"list": []},
"templating": {"list": []},
"time": {"from": "now-5m", "to": "now"},
"timepicker": {},
"panels": [
_timeseries_panel(
panel_id=1,
title="Block Production Interval (seconds)",
expr="1 / rate(blockchain_block_height[1m])",
grid_x=0,
grid_y=0,
datasource_uid=datasource_uid,
),
_timeseries_panel(
panel_id=2,
title="Mempool Queue Depth",
expr="avg_over_time(mempool_queue_depth[1m])",
grid_x=12,
grid_y=0,
datasource_uid=datasource_uid,
),
_timeseries_panel(
panel_id=5,
title="Proposer Rotation Count",
expr="increase(poa_proposer_rotations_total[5m])",
grid_x=0,
grid_y=8,
datasource_uid=datasource_uid,
),
_timeseries_panel(
panel_id=3,
title="Miner Queue Depth",
expr="avg_over_time(miner_queue_depth[1m])",
grid_x=12,
grid_y=8,
datasource_uid=datasource_uid,
),
_timeseries_panel(
panel_id=4,
title="Miner Job Duration Seconds",
expr="avg_over_time(miner_job_duration_seconds_sum[1m]) / avg_over_time(miner_job_duration_seconds_count[1m])",
grid_x=0,
grid_y=16,
datasource_uid=datasource_uid,
),
_timeseries_panel(
panel_id=6,
title="RPC 95th Percentile Latency",
expr="histogram_quantile(0.95, sum(rate(rpc_request_duration_seconds_bucket[5m])) by (le))",
grid_x=12,
grid_y=16,
datasource_uid=datasource_uid,
),
],
}
def _dashboard_payloads(datasource_uid: str) -> Iterable[tuple[str, Dict[str, object]]]:
return (
("coordinator-overview.json", _coordinator_dashboard(datasource_uid)),
("blockchain-node-overview.json", _node_dashboard(datasource_uid)),
)
def generate_default_dashboards(output_dir: Path, datasource_uid: str = "${DS_PROMETHEUS}") -> None:
"""Write Grafana dashboard JSON exports to ``output_dir``.
Parameters
----------
output_dir:
Directory that will receive the generated JSON files. It is created if
it does not already exist.
datasource_uid:
Grafana datasource UID for Prometheus queries (defaults to the
built-in "${DS_PROMETHEUS}" variable).
"""
output_dir.mkdir(parents=True, exist_ok=True)
for filename, payload in _dashboard_payloads(datasource_uid):
dashboard_path = output_dir / filename
with dashboard_path.open("w", encoding="utf-8") as fp:
json.dump(payload, fp, indent=2, sort_keys=True)

View File

@@ -0,0 +1,17 @@
"""Placeholder exporter registration for metrics/log sinks."""
from __future__ import annotations
from typing import Iterable
REGISTERED_EXPORTERS: list[str] = []
def register_exporters(exporters: Iterable[str]) -> None:
"""Attach exporters for observability pipelines.
Real implementations might wire Prometheus registrations, log shippers,
or tracing exporters. For now, we simply record the names to keep track
of requested sinks.
"""
REGISTERED_EXPORTERS.extend(exporters)

View File

@@ -1,8 +1,15 @@
from __future__ import annotations
import sys
from pathlib import Path
import pytest
from sqlmodel import SQLModel, Session, create_engine
PROJECT_ROOT = Path(__file__).resolve().parent.parent / "src"
if str(PROJECT_ROOT) not in sys.path:
sys.path.insert(0, str(PROJECT_ROOT))
from aitbc_chain.models import Block, Transaction, Receipt # noqa: F401 - ensure models imported for metadata

View File

@@ -0,0 +1,39 @@
"""Tests for the observability dashboard helpers."""
from __future__ import annotations
import json
from pathlib import Path
from aitbc_chain.observability.dashboards import generate_default_dashboards
from aitbc_chain.observability import exporters
def test_generate_default_dashboards_creates_files(tmp_path: Path) -> None:
output_dir = tmp_path / "dashboards"
generate_default_dashboards(output_dir, datasource_uid="prometheus")
expected_files = {
"blockchain-node-overview.json",
"coordinator-overview.json",
}
actual_files = {path.name for path in output_dir.glob("*.json")}
assert actual_files == expected_files
for file_path in output_dir.glob("*.json"):
with file_path.open("r", encoding="utf-8") as handle:
payload = json.load(handle)
assert payload["uid"] in {"aitbc-coordinator", "aitbc-node"}
assert payload["title"].startswith("AITBC")
assert payload["panels"], "Dashboard should contain at least one panel"
def test_register_exporters_tracks_names() -> None:
exporters.REGISTERED_EXPORTERS.clear()
exporters.register_exporters(["prometheus", "loki"])
assert exporters.REGISTERED_EXPORTERS == ["prometheus", "loki"]

View File

@@ -1,6 +1,7 @@
from __future__ import annotations
import asyncio
from contextlib import ExitStack
from fastapi.testclient import TestClient
@@ -10,8 +11,6 @@ from aitbc_chain.gossip import gossip_broker
def _publish(topic: str, message: dict) -> None:
asyncio.run(gossip_broker.publish(topic, message))
def test_blocks_websocket_stream() -> None:
client = TestClient(create_app())
@@ -28,19 +27,100 @@ def test_blocks_websocket_stream() -> None:
assert message == payload
def test_transactions_websocket_stream() -> None:
def test_blocks_websocket_multiple_subscribers_receive_all_payloads() -> None:
with TestClient(create_app()) as client, ExitStack() as stack:
sockets = [
stack.enter_context(client.websocket_connect("/rpc/ws/blocks"))
for _ in range(3)
]
payloads = [
{
"height": height,
"hash": "0x" + f"{height:064x}",
"parent_hash": (
"0x" + f"{height - 1:064x}" if height > 0 else "0x" + "0" * 64
),
"timestamp": f"2025-01-01T00:00:{height:02d}Z",
"tx_count": height % 3,
}
for height in range(5)
]
for payload in payloads:
_publish("blocks", payload)
for socket in sockets:
received = [socket.receive_json() for _ in payloads]
assert received == payloads
# Publish another payload to ensure subscribers continue receiving in order.
final_payload = {
"height": 99,
"hash": "0x" + "f" * 64,
"parent_hash": "0x" + "e" * 64,
"timestamp": "2025-01-01T00:01:39Z",
"tx_count": 5,
}
_publish("blocks", final_payload)
for socket in sockets:
assert socket.receive_json() == final_payload
def test_blocks_websocket_high_volume_load() -> None:
message_count = 40
subscriber_count = 4
with TestClient(create_app()) as client, ExitStack() as stack:
sockets = [
stack.enter_context(client.websocket_connect("/rpc/ws/blocks"))
for _ in range(subscriber_count)
]
payloads = []
for height in range(message_count):
payload = {
"height": height,
"hash": "0x" + f"{height + 100:064x}",
"parent_hash": "0x" + f"{height + 99:064x}" if height > 0 else "0x" + "0" * 64,
"timestamp": f"2025-01-01T00:{height // 60:02d}:{height % 60:02d}Z",
"tx_count": height % 7,
}
payloads.append(payload)
_publish("blocks", payload)
for socket in sockets:
received = [socket.receive_json() for _ in payloads]
assert received == payloads
def test_transactions_websocket_cleans_up_on_disconnect() -> None:
client = TestClient(create_app())
with client.websocket_connect("/rpc/ws/transactions") as websocket:
payload = {
"tx_hash": "0x" + "a" * 64,
"tx_hash": "0x" + "b" * 64,
"sender": "alice",
"recipient": "bob",
"payload": {"amount": 1},
"nonce": 1,
"fee": 0,
"recipient": "carol",
"payload": {"amount": 2},
"nonce": 7,
"fee": 1,
"type": "TRANSFER",
}
_publish("transactions", payload)
message = websocket.receive_json()
assert message == payload
assert websocket.receive_json() == payload
# After closing the websocket, publishing again should not raise and should not hang.
_publish(
"transactions",
{
"tx_hash": "0x" + "c" * 64,
"sender": "alice",
"recipient": "dave",
"payload": {"amount": 3},
"nonce": 8,
"fee": 1,
"type": "TRANSFER",
},
)