Some checks failed
CLI Tests / test-cli (push) Failing after 12s
Deploy to Testnet / deploy-testnet (push) Successful in 1m6s
Security Scanning / security-scan (push) Successful in 30s
Node Failover Simulation / failover-test (push) Successful in 4s
Multi-Node Stress Testing / stress-test (push) Successful in 4s
Cross-Node Transaction Testing / transaction-test (push) Successful in 2s
- Add output call to display campaign data when specific campaign_id is found - Add early return after outputting campaign to prevent further processing
516 lines
18 KiB
Python
Executable File
516 lines
18 KiB
Python
Executable File
"""Monitoring, metrics, and alerting commands for AITBC CLI"""
|
|
|
|
import click
|
|
from utils import output, error, success, console
|
|
import time
|
|
import httpx
|
|
import json
|
|
from pathlib import Path
|
|
from typing import Optional
|
|
from datetime import datetime, timedelta
|
|
from aitbc_cli.config import get_config, CLIConfig
|
|
|
|
|
|
@click.group()
|
|
@click.pass_context
|
|
def monitor(ctx):
|
|
"""Monitoring, metrics, and alerting commands"""
|
|
# Initialize context object with config
|
|
if ctx.obj is None:
|
|
ctx.obj = {}
|
|
ctx.obj['config'] = get_config()
|
|
ctx.obj['output_format'] = ctx.obj.get('output_format', 'table')
|
|
|
|
|
|
@monitor.command()
|
|
@click.option("--refresh", type=int, default=5, help="Refresh interval in seconds")
|
|
@click.option("--duration", type=int, default=0, help="Duration in seconds (0 = indefinite)")
|
|
@click.pass_context
|
|
def dashboard(ctx, refresh: int, duration: int):
|
|
"""Real-time system dashboard"""
|
|
config = ctx.obj['config']
|
|
start_time = time.time()
|
|
|
|
try:
|
|
while True:
|
|
elapsed = time.time() - start_time
|
|
if duration > 0 and elapsed >= duration:
|
|
break
|
|
|
|
console.clear()
|
|
console.rule("[bold blue]AITBC Dashboard[/bold blue]")
|
|
console.print(f"[dim]Refreshing every {refresh}s | Elapsed: {int(elapsed)}s[/dim]\n")
|
|
|
|
# Fetch system dashboard
|
|
try:
|
|
with httpx.Client(timeout=5) as client:
|
|
# Get dashboard data
|
|
try:
|
|
url = f"{config.coordinator_url}/api/v1/dashboard"
|
|
resp = client.get(
|
|
url,
|
|
headers={"X-Api-Key": config.api_key or ""}
|
|
)
|
|
if resp.status_code == 200:
|
|
dashboard = resp.json()
|
|
console.print("[bold green]Dashboard Status:[/bold green] Online")
|
|
|
|
# Overall status
|
|
overall_status = dashboard.get("overall_status", "unknown")
|
|
console.print(f" Overall Status: {overall_status}")
|
|
|
|
# Services summary
|
|
services = dashboard.get("services", {})
|
|
console.print(f" Services: {len(services)}")
|
|
|
|
for service_name, service_data in services.items():
|
|
# Handle both string ("online") and dict ({"status": "online"}) formats
|
|
if isinstance(service_data, dict):
|
|
status = service_data.get("status", "unknown")
|
|
else:
|
|
status = service_data if service_data else "unknown"
|
|
console.print(f" {service_name}: {status}")
|
|
|
|
# Metrics summary
|
|
metrics = dashboard.get("metrics", {})
|
|
if metrics:
|
|
health_pct = metrics.get("health_percentage", 0)
|
|
console.print(f" Health: {health_pct:.1f}%")
|
|
|
|
else:
|
|
console.print(f"[bold yellow]Dashboard:[/bold yellow] HTTP {resp.status_code}")
|
|
except Exception as e:
|
|
console.print(f"[bold red]Dashboard:[/bold red] Error - {e}")
|
|
|
|
except Exception as e:
|
|
console.print(f"[red]Error fetching data: {e}[/red]")
|
|
|
|
console.print(f"\n[dim]Press Ctrl+C to exit[/dim]")
|
|
time.sleep(refresh)
|
|
|
|
except KeyboardInterrupt:
|
|
console.print("\n[bold]Dashboard stopped[/bold]")
|
|
|
|
|
|
@monitor.command()
|
|
@click.option("--period", default="24h", help="Time period (1h, 24h, 7d, 30d)")
|
|
@click.option("--export", "export_path", type=click.Path(), help="Export metrics to file")
|
|
@click.pass_context
|
|
def metrics(ctx, period: str, export_path: Optional[str]):
|
|
"""Collect and display system metrics"""
|
|
config = ctx.obj['config']
|
|
|
|
# Parse period
|
|
multipliers = {"h": 3600, "d": 86400}
|
|
unit = period[-1]
|
|
value = int(period[:-1])
|
|
seconds = value * multipliers.get(unit, 3600)
|
|
since = datetime.now() - timedelta(seconds=seconds)
|
|
|
|
metrics_data = {
|
|
"period": period,
|
|
"since": since.isoformat(),
|
|
"collected_at": datetime.now().isoformat(),
|
|
"coordinator": {},
|
|
"jobs": {},
|
|
"miners": {}
|
|
}
|
|
|
|
try:
|
|
with httpx.Client(timeout=10) as client:
|
|
# Coordinator metrics
|
|
try:
|
|
resp = client.get(
|
|
f"{config.coordinator_url}/status",
|
|
headers={"X-Api-Key": config.api_key or ""}
|
|
)
|
|
if resp.status_code == 200:
|
|
metrics_data["coordinator"] = resp.json()
|
|
metrics_data["coordinator"]["status"] = "online"
|
|
else:
|
|
metrics_data["coordinator"]["status"] = f"error_{resp.status_code}"
|
|
except Exception:
|
|
metrics_data["coordinator"]["status"] = "offline"
|
|
|
|
# Job metrics
|
|
try:
|
|
resp = client.get(
|
|
f"{config.coordinator_url}/jobs",
|
|
headers={"X-Api-Key": config.api_key or ""}
|
|
)
|
|
if resp.status_code == 200:
|
|
jobs = resp.json()
|
|
if isinstance(jobs, list):
|
|
metrics_data["jobs"] = {
|
|
"total": len(jobs),
|
|
"completed": sum(1 for j in jobs if j.get("status") == "completed"),
|
|
"pending": sum(1 for j in jobs if j.get("status") == "pending"),
|
|
"failed": sum(1 for j in jobs if j.get("status") == "failed"),
|
|
}
|
|
except Exception:
|
|
metrics_data["jobs"] = {"error": "unavailable"}
|
|
|
|
# Miner metrics
|
|
try:
|
|
resp = client.get(
|
|
f"{config.coordinator_url}/miners",
|
|
headers={"X-Api-Key": config.api_key or ""}
|
|
)
|
|
if resp.status_code == 200:
|
|
miners = resp.json()
|
|
if isinstance(miners, list):
|
|
metrics_data["miners"] = {
|
|
"total": len(miners),
|
|
"online": sum(1 for m in miners if m.get("status") == "ONLINE"),
|
|
"offline": sum(1 for m in miners if m.get("status") != "ONLINE"),
|
|
}
|
|
except Exception:
|
|
metrics_data["miners"] = {"error": "unavailable"}
|
|
|
|
except Exception as e:
|
|
error(f"Failed to collect metrics: {e}")
|
|
|
|
if export_path:
|
|
with open(export_path, "w") as f:
|
|
json.dump(metrics_data, f, indent=2)
|
|
success(f"Metrics exported to {export_path}")
|
|
|
|
output(metrics_data, ctx.obj['output_format'])
|
|
|
|
|
|
@monitor.command()
|
|
@click.argument("action", type=click.Choice(["add", "list", "remove", "test"]))
|
|
@click.option("--name", help="Alert name")
|
|
@click.option("--type", "alert_type", type=click.Choice(["coordinator_down", "miner_offline", "job_failed", "low_balance"]), help="Alert type")
|
|
@click.option("--threshold", type=float, help="Alert threshold value")
|
|
@click.option("--webhook", help="Webhook URL for notifications")
|
|
@click.pass_context
|
|
def alerts(ctx, action: str, name: Optional[str], alert_type: Optional[str],
|
|
threshold: Optional[float], webhook: Optional[str]):
|
|
"""Configure monitoring alerts"""
|
|
alerts_dir = Path.home() / ".aitbc" / "alerts"
|
|
alerts_dir.mkdir(parents=True, exist_ok=True)
|
|
alerts_file = alerts_dir / "alerts.json"
|
|
|
|
# Load existing alerts
|
|
existing = []
|
|
if alerts_file.exists():
|
|
with open(alerts_file) as f:
|
|
existing = json.load(f)
|
|
|
|
if action == "add":
|
|
if not name or not alert_type:
|
|
error("Alert name and type required (--name, --type)")
|
|
return
|
|
alert = {
|
|
"name": name,
|
|
"type": alert_type,
|
|
"threshold": threshold,
|
|
"webhook": webhook,
|
|
"created_at": datetime.now().isoformat(),
|
|
"enabled": True
|
|
}
|
|
existing.append(alert)
|
|
with open(alerts_file, "w") as f:
|
|
json.dump(existing, f, indent=2)
|
|
success(f"Alert '{name}' added")
|
|
output(alert, ctx.obj['output_format'])
|
|
|
|
elif action == "list":
|
|
if not existing:
|
|
output({"message": "No alerts configured"}, ctx.obj['output_format'])
|
|
else:
|
|
output(existing, ctx.obj['output_format'])
|
|
|
|
elif action == "remove":
|
|
if not name:
|
|
error("Alert name required (--name)")
|
|
return
|
|
existing = [a for a in existing if a["name"] != name]
|
|
with open(alerts_file, "w") as f:
|
|
json.dump(existing, f, indent=2)
|
|
success(f"Alert '{name}' removed")
|
|
|
|
elif action == "test":
|
|
if not name:
|
|
error("Alert name required (--name)")
|
|
return
|
|
alert = next((a for a in existing if a["name"] == name), None)
|
|
if not alert:
|
|
error(f"Alert '{name}' not found")
|
|
return
|
|
if alert.get("webhook"):
|
|
try:
|
|
with httpx.Client(timeout=10) as client:
|
|
resp = client.post(alert["webhook"], json={
|
|
"alert": name,
|
|
"type": alert["type"],
|
|
"message": f"Test alert from AITBC CLI",
|
|
"timestamp": datetime.now().isoformat()
|
|
})
|
|
output({"status": "sent", "response_code": resp.status_code}, ctx.obj['output_format'])
|
|
except Exception as e:
|
|
error(f"Webhook test failed: {e}")
|
|
else:
|
|
output({"status": "no_webhook", "alert": alert}, ctx.obj['output_format'])
|
|
|
|
|
|
@monitor.command()
|
|
@click.option("--period", default="7d", help="Analysis period (1d, 7d, 30d)")
|
|
@click.pass_context
|
|
def history(ctx, period: str):
|
|
"""Historical data analysis"""
|
|
config = ctx.obj['config']
|
|
|
|
multipliers = {"h": 3600, "d": 86400}
|
|
unit = period[-1]
|
|
value = int(period[:-1])
|
|
seconds = value * multipliers.get(unit, 3600)
|
|
since = datetime.now() - timedelta(seconds=seconds)
|
|
|
|
analysis = {
|
|
"period": period,
|
|
"since": since.isoformat(),
|
|
"analyzed_at": datetime.now().isoformat(),
|
|
"summary": {}
|
|
}
|
|
|
|
try:
|
|
with httpx.Client(timeout=10) as client:
|
|
try:
|
|
resp = client.get(
|
|
f"{config.coordinator_url}/jobs",
|
|
headers={"X-Api-Key": config.api_key or ""}
|
|
)
|
|
if resp.status_code == 200:
|
|
jobs = resp.json()
|
|
if isinstance(jobs, list):
|
|
completed = [j for j in jobs if j.get("status") == "completed"]
|
|
failed = [j for j in jobs if j.get("status") == "failed"]
|
|
analysis["summary"] = {
|
|
"total_jobs": len(jobs),
|
|
"completed": len(completed),
|
|
"failed": len(failed),
|
|
"success_rate": f"{len(completed) / max(1, len(jobs)) * 100:.1f}%",
|
|
}
|
|
except Exception:
|
|
analysis["summary"] = {"error": "Could not fetch job data"}
|
|
|
|
except Exception as e:
|
|
error(f"Analysis failed: {e}")
|
|
|
|
output(analysis, ctx.obj['output_format'])
|
|
|
|
|
|
@monitor.command()
|
|
@click.argument("action", type=click.Choice(["add", "list", "remove", "test"]))
|
|
@click.option("--name", help="Webhook name")
|
|
@click.option("--url", help="Webhook URL")
|
|
@click.option("--events", help="Comma-separated event types (job_completed,miner_offline,alert)")
|
|
@click.pass_context
|
|
def webhooks(ctx, action: str, name: Optional[str], url: Optional[str], events: Optional[str]):
|
|
"""Manage webhook notifications"""
|
|
webhooks_dir = Path.home() / ".aitbc" / "webhooks"
|
|
webhooks_dir.mkdir(parents=True, exist_ok=True)
|
|
webhooks_file = webhooks_dir / "webhooks.json"
|
|
|
|
existing = []
|
|
if webhooks_file.exists():
|
|
with open(webhooks_file) as f:
|
|
existing = json.load(f)
|
|
|
|
if action == "add":
|
|
if not name or not url:
|
|
error("Webhook name and URL required (--name, --url)")
|
|
return
|
|
webhook = {
|
|
"name": name,
|
|
"url": url,
|
|
"events": events.split(",") if events else ["all"],
|
|
"created_at": datetime.now().isoformat(),
|
|
"enabled": True
|
|
}
|
|
existing.append(webhook)
|
|
with open(webhooks_file, "w") as f:
|
|
json.dump(existing, f, indent=2)
|
|
success(f"Webhook '{name}' added")
|
|
output(webhook, ctx.obj['output_format'])
|
|
|
|
elif action == "list":
|
|
if not existing:
|
|
output({"message": "No webhooks configured"}, ctx.obj['output_format'])
|
|
else:
|
|
output(existing, ctx.obj['output_format'])
|
|
|
|
elif action == "remove":
|
|
if not name:
|
|
error("Webhook name required (--name)")
|
|
return
|
|
existing = [w for w in existing if w["name"] != name]
|
|
with open(webhooks_file, "w") as f:
|
|
json.dump(existing, f, indent=2)
|
|
success(f"Webhook '{name}' removed")
|
|
|
|
elif action == "test":
|
|
if not name:
|
|
error("Webhook name required (--name)")
|
|
return
|
|
wh = next((w for w in existing if w["name"] == name), None)
|
|
if not wh:
|
|
error(f"Webhook '{name}' not found")
|
|
return
|
|
try:
|
|
with httpx.Client(timeout=10) as client:
|
|
resp = client.post(wh["url"], json={
|
|
"event": "test",
|
|
"source": "aitbc-cli",
|
|
"message": "Test webhook notification",
|
|
"timestamp": datetime.now().isoformat()
|
|
})
|
|
output({"status": "sent", "response_code": resp.status_code}, ctx.obj['output_format'])
|
|
except Exception as e:
|
|
error(f"Webhook test failed: {e}")
|
|
|
|
|
|
CAMPAIGNS_DIR = Path.home() / ".aitbc" / "campaigns"
|
|
|
|
|
|
def _ensure_campaigns():
|
|
CAMPAIGNS_DIR.mkdir(parents=True, exist_ok=True)
|
|
campaigns_file = CAMPAIGNS_DIR / "campaigns.json"
|
|
if not campaigns_file.exists() or campaigns_file.stat().st_size == 0:
|
|
# Seed with default campaigns
|
|
default = {"campaigns": [
|
|
{
|
|
"id": "staking_launch",
|
|
"name": "Staking Launch Campaign",
|
|
"type": "staking",
|
|
"apy_boost": 2.0,
|
|
"start_date": "2026-02-01T00:00:00",
|
|
"end_date": "2026-04-01T00:00:00",
|
|
"status": "active",
|
|
"total_staked": 0,
|
|
"participants": 0,
|
|
"rewards_distributed": 0
|
|
},
|
|
{
|
|
"id": "liquidity_mining_q1",
|
|
"name": "Q1 Liquidity Mining",
|
|
"type": "liquidity",
|
|
"apy_boost": 3.0,
|
|
"start_date": "2026-01-15T00:00:00",
|
|
"end_date": "2026-03-15T00:00:00",
|
|
"status": "active",
|
|
"total_staked": 0,
|
|
"participants": 0,
|
|
"rewards_distributed": 0
|
|
}
|
|
]}
|
|
with open(campaigns_file, "w") as f:
|
|
json.dump(default, f, indent=2)
|
|
return campaigns_file
|
|
|
|
|
|
@monitor.command()
|
|
@click.option("--status", type=click.Choice(["active", "ended", "all"]), default="all", help="Filter by status")
|
|
@click.pass_context
|
|
def campaigns(ctx, status: str):
|
|
"""List active incentive campaigns"""
|
|
campaigns_file = _ensure_campaigns()
|
|
try:
|
|
with open(campaigns_file) as f:
|
|
data = json.load(f)
|
|
except (json.JSONDecodeError, IOError):
|
|
# File is empty or invalid - recreate it
|
|
campaigns_file = _ensure_campaigns()
|
|
with open(campaigns_file) as f:
|
|
data = json.load(f)
|
|
|
|
campaign_list = data.get("campaigns", [])
|
|
|
|
# Auto-update status
|
|
now = datetime.now()
|
|
for c in campaign_list:
|
|
end = datetime.fromisoformat(c["end_date"])
|
|
if now > end and c["status"] == "active":
|
|
c["status"] = "ended"
|
|
with open(campaigns_file, "w") as f:
|
|
json.dump(data, f, indent=2)
|
|
|
|
if status != "all":
|
|
campaign_list = [c for c in campaign_list if c["status"] == status]
|
|
|
|
if not campaign_list:
|
|
output({"message": "No campaigns found"}, ctx.obj['output_format'])
|
|
return
|
|
|
|
output(campaign_list, ctx.obj['output_format'])
|
|
|
|
|
|
@monitor.command(name="campaign-stats")
|
|
@click.argument("campaign_id", required=False)
|
|
@click.pass_context
|
|
def campaign_stats(ctx, campaign_id: Optional[str]):
|
|
"""Campaign performance metrics (TVL, participants, rewards)"""
|
|
campaigns_file = _ensure_campaigns()
|
|
try:
|
|
with open(campaigns_file) as f:
|
|
data = json.load(f)
|
|
except (json.JSONDecodeError, IOError):
|
|
# File is empty or invalid - recreate it
|
|
campaigns_file = _ensure_campaigns()
|
|
with open(campaigns_file) as f:
|
|
data = json.load(f)
|
|
|
|
campaign_list = data.get("campaigns", [])
|
|
|
|
if campaign_id:
|
|
campaign = next((c for c in campaign_list if c["id"] == campaign_id), None)
|
|
if not campaign:
|
|
error(f"Campaign '{campaign_id}' not found")
|
|
ctx.exit(1)
|
|
output(campaign, ctx.obj['output_format'])
|
|
return
|
|
|
|
|
|
@monitor.command()
|
|
@click.option("--service", help="Service to start")
|
|
def start(service: str):
|
|
"""Start monitoring service"""
|
|
output({
|
|
"service": service or "all",
|
|
"status": "started"
|
|
})
|
|
|
|
|
|
@monitor.command()
|
|
@click.option("--service", help="Service to stop")
|
|
def stop(service: str):
|
|
"""Stop monitoring service"""
|
|
output({
|
|
"service": service or "all",
|
|
"status": "stopped"
|
|
})
|
|
|
|
|
|
@monitor.command()
|
|
@click.option("--service", help="Service to check")
|
|
def status(service: str):
|
|
"""Get monitoring service status"""
|
|
output({
|
|
"service": service or "all",
|
|
"status": "running",
|
|
"uptime": "0:00:00"
|
|
})
|
|
|
|
|
|
@monitor.command()
|
|
@click.option("--severity", help="Filter by severity")
|
|
def alerts(severity: str):
|
|
"""List monitoring alerts"""
|
|
output({
|
|
"alerts": [],
|
|
"severity": severity or "all"
|
|
})
|
|
|