Some checks failed
API Endpoint Tests / test-api-endpoints (push) Successful in 10s
Blockchain Synchronization Verification / sync-verification (push) Failing after 3s
CLI Tests / test-cli (push) Failing after 4s
Documentation Validation / validate-docs (push) Successful in 8s
Documentation Validation / validate-policies-strict (push) Successful in 4s
Integration Tests / test-service-integration (push) Successful in 38s
Multi-Node Blockchain Health Monitoring / health-check (push) Successful in 2s
P2P Network Verification / p2p-verification (push) Successful in 3s
Security Scanning / security-scan (push) Successful in 40s
Smart Contract Tests / test-solidity (map[name:aitbc-token path:packages/solidity/aitbc-token]) (push) Successful in 15s
Smart Contract Tests / lint-solidity (push) Successful in 8s
- Relocate blockchain-event-bridge README content to docs/apps/blockchain/blockchain-event-bridge.md - Relocate blockchain-explorer README content to docs/apps/blockchain/blockchain-explorer.md - Replace app READMEs with redirect notices pointing to new documentation location - Consolidate documentation in central docs/ directory for better organization
487 lines
19 KiB
Python
487 lines
19 KiB
Python
"""
|
|
Pool Hub CLI Commands for AITBC
|
|
Commands for SLA monitoring, capacity planning, and billing integration
|
|
"""
|
|
|
|
import click
|
|
import json
|
|
import requests
|
|
from datetime import datetime
|
|
from typing import Dict, Any, List, Optional
|
|
|
|
@click.group()
|
|
def pool_hub():
|
|
"""Pool hub management commands for SLA monitoring and billing"""
|
|
pass
|
|
|
|
@pool_hub.command()
|
|
@click.argument('miner_id', required=False)
|
|
@click.option('--test-mode', is_flag=True, help='Run in test mode')
|
|
def sla_metrics(miner_id, test_mode):
|
|
"""Get SLA metrics for a miner or all miners"""
|
|
try:
|
|
if test_mode:
|
|
# Mock data for testing
|
|
if miner_id:
|
|
mock_metrics = {
|
|
"miner_id": miner_id,
|
|
"uptime_percentage": 97.5,
|
|
"response_time_ms": 850,
|
|
"job_completion_rate": 92.3,
|
|
"capacity_availability": 85.0,
|
|
"thresholds": {
|
|
"uptime": 95.0,
|
|
"response_time": 1000,
|
|
"completion_rate": 90.0,
|
|
"capacity": 80.0
|
|
},
|
|
"violations": [
|
|
{
|
|
"type": "response_time",
|
|
"threshold": 1000,
|
|
"actual": 1200,
|
|
"timestamp": "2024-03-15T14:30:00Z"
|
|
}
|
|
]
|
|
}
|
|
|
|
click.echo(f"📊 SLA Metrics for {miner_id}:")
|
|
click.echo("=" * 50)
|
|
click.echo(f"⏱️ Uptime: {mock_metrics['uptime_percentage']}% (threshold: {mock_metrics['thresholds']['uptime']}%)")
|
|
click.echo(f"⚡ Response Time: {mock_metrics['response_time_ms']}ms (threshold: {mock_metrics['thresholds']['response_time']}ms)")
|
|
click.echo(f"✅ Job Completion Rate: {mock_metrics['job_completion_rate']}% (threshold: {mock_metrics['thresholds']['completion_rate']}%)")
|
|
click.echo(f"📦 Capacity Availability: {mock_metrics['capacity_availability']}% (threshold: {mock_metrics['thresholds']['capacity']}%)")
|
|
|
|
if mock_metrics['violations']:
|
|
click.echo("")
|
|
click.echo("⚠️ Violations:")
|
|
for v in mock_metrics['violations']:
|
|
click.echo(f" {v['type']}: {v['actual']} vs threshold {v['threshold']} at {v['timestamp']}")
|
|
else:
|
|
mock_metrics = {
|
|
"total_miners": 45,
|
|
"average_uptime": 96.2,
|
|
"average_response_time": 780,
|
|
"average_completion_rate": 94.1,
|
|
"average_capacity": 88.5,
|
|
"miners_below_threshold": 3
|
|
}
|
|
|
|
click.echo("📊 SLA Metrics (All Miners):")
|
|
click.echo("=" * 50)
|
|
click.echo(f"👥 Total Miners: {mock_metrics['total_miners']}")
|
|
click.echo(f"⏱️ Average Uptime: {mock_metrics['average_uptime']}%")
|
|
click.echo(f"⚡ Average Response Time: {mock_metrics['average_response_time']}ms")
|
|
click.echo(f"✅ Average Completion Rate: {mock_metrics['average_completion_rate']}%")
|
|
click.echo(f"📦 Average Capacity: {mock_metrics['average_capacity']}%")
|
|
click.echo(f"⚠️ Miners Below Threshold: {mock_metrics['miners_below_threshold']}")
|
|
|
|
return
|
|
|
|
# Fetch from pool-hub service
|
|
config = get_config()
|
|
|
|
if miner_id:
|
|
response = requests.get(
|
|
f"{config.pool_hub_url}/sla/metrics/{miner_id}",
|
|
timeout=30
|
|
)
|
|
else:
|
|
response = requests.get(
|
|
f"{config.pool_hub_url}/sla/metrics",
|
|
timeout=30
|
|
)
|
|
|
|
if response.status_code == 200:
|
|
metrics = response.json()
|
|
|
|
if miner_id:
|
|
click.echo(f"📊 SLA Metrics for {miner_id}:")
|
|
click.echo("=" * 50)
|
|
click.echo(f"⏱️ Uptime: {metrics.get('uptime_percentage', 0)}%")
|
|
click.echo(f"⚡ Response Time: {metrics.get('response_time_ms', 0)}ms")
|
|
click.echo(f"✅ Job Completion Rate: {metrics.get('job_completion_rate', 0)}%")
|
|
click.echo(f"📦 Capacity Availability: {metrics.get('capacity_availability', 0)}%")
|
|
else:
|
|
click.echo("📊 SLA Metrics (All Miners):")
|
|
click.echo("=" * 50)
|
|
click.echo(f"👥 Total Miners: {metrics.get('total_miners', 0)}")
|
|
click.echo(f"⏱️ Average Uptime: {metrics.get('average_uptime', 0)}%")
|
|
click.echo(f"⚡ Average Response Time: {metrics.get('average_response_time', 0)}ms")
|
|
click.echo(f"✅ Average Completion Rate: {metrics.get('average_completion_rate', 0)}%")
|
|
else:
|
|
click.echo(f"❌ Failed to get SLA metrics: {response.text}", err=True)
|
|
|
|
except Exception as e:
|
|
click.echo(f"❌ Error getting SLA metrics: {str(e)}", err=True)
|
|
|
|
@pool_hub.command()
|
|
@click.option('--test-mode', is_flag=True, help='Run in test mode')
|
|
def sla_violations(test_mode):
|
|
"""Get SLA violations across all miners"""
|
|
try:
|
|
if test_mode:
|
|
# Mock data for testing
|
|
mock_violations = [
|
|
{
|
|
"miner_id": "miner_001",
|
|
"type": "response_time",
|
|
"threshold": 1000,
|
|
"actual": 1200,
|
|
"timestamp": "2024-03-15T14:30:00Z"
|
|
},
|
|
{
|
|
"miner_id": "miner_002",
|
|
"type": "uptime",
|
|
"threshold": 95.0,
|
|
"actual": 92.5,
|
|
"timestamp": "2024-03-15T13:45:00Z"
|
|
}
|
|
]
|
|
|
|
click.echo("⚠️ SLA Violations:")
|
|
click.echo("=" * 50)
|
|
for v in mock_violations:
|
|
click.echo(f"👤 Miner: {v['miner_id']}")
|
|
click.echo(f" Type: {v['type']}")
|
|
click.echo(f" Threshold: {v['threshold']}")
|
|
click.echo(f" Actual: {v['actual']}")
|
|
click.echo(f" Timestamp: {v['timestamp']}")
|
|
click.echo("")
|
|
|
|
return
|
|
|
|
# Fetch from pool-hub service
|
|
config = get_config()
|
|
response = requests.get(
|
|
f"{config.pool_hub_url}/sla/violations",
|
|
timeout=30
|
|
)
|
|
|
|
if response.status_code == 200:
|
|
violations = response.json()
|
|
|
|
click.echo("⚠️ SLA Violations:")
|
|
click.echo("=" * 50)
|
|
for v in violations:
|
|
click.echo(f"👤 Miner: {v['miner_id']}")
|
|
click.echo(f" Type: {v['type']}")
|
|
click.echo(f" Threshold: {v['threshold']}")
|
|
click.echo(f" Actual: {v['actual']}")
|
|
click.echo(f" Timestamp: {v['timestamp']}")
|
|
click.echo("")
|
|
else:
|
|
click.echo(f"❌ Failed to get violations: {response.text}", err=True)
|
|
|
|
except Exception as e:
|
|
click.echo(f"❌ Error getting violations: {str(e)}", err=True)
|
|
|
|
@pool_hub.command()
|
|
@click.option('--test-mode', is_flag=True, help='Run in test mode')
|
|
def capacity_snapshots(test_mode):
|
|
"""Get capacity planning snapshots"""
|
|
try:
|
|
if test_mode:
|
|
# Mock data for testing
|
|
mock_snapshots = [
|
|
{
|
|
"timestamp": "2024-03-15T00:00:00Z",
|
|
"total_capacity": 1250,
|
|
"available_capacity": 320,
|
|
"utilization": 74.4,
|
|
"active_miners": 42
|
|
},
|
|
{
|
|
"timestamp": "2024-03-14T00:00:00Z",
|
|
"total_capacity": 1200,
|
|
"available_capacity": 350,
|
|
"utilization": 70.8,
|
|
"active_miners": 40
|
|
}
|
|
]
|
|
|
|
click.echo("📊 Capacity Snapshots:")
|
|
click.echo("=" * 50)
|
|
for s in mock_snapshots:
|
|
click.echo(f"🕐 Timestamp: {s['timestamp']}")
|
|
click.echo(f" Total Capacity: {s['total_capacity']} GPU")
|
|
click.echo(f" Available: {s['available_capacity']} GPU")
|
|
click.echo(f" Utilization: {s['utilization']}%")
|
|
click.echo(f" Active Miners: {s['active_miners']}")
|
|
click.echo("")
|
|
|
|
return
|
|
|
|
# Fetch from pool-hub service
|
|
config = get_config()
|
|
response = requests.get(
|
|
f"{config.pool_hub_url}/sla/capacity/snapshots",
|
|
timeout=30
|
|
)
|
|
|
|
if response.status_code == 200:
|
|
snapshots = response.json()
|
|
|
|
click.echo("📊 Capacity Snapshots:")
|
|
click.echo("=" * 50)
|
|
for s in snapshots:
|
|
click.echo(f"🕐 Timestamp: {s['timestamp']}")
|
|
click.echo(f" Total Capacity: {s['total_capacity']} GPU")
|
|
click.echo(f" Available: {s['available_capacity']} GPU")
|
|
click.echo(f" Utilization: {s['utilization']}%")
|
|
click.echo(f" Active Miners: {s['active_miners']}")
|
|
click.echo("")
|
|
else:
|
|
click.echo(f"❌ Failed to get snapshots: {response.text}", err=True)
|
|
|
|
except Exception as e:
|
|
click.echo(f"❌ Error getting snapshots: {str(e)}", err=True)
|
|
|
|
@pool_hub.command()
|
|
@click.option('--test-mode', is_flag=True, help='Run in test mode')
|
|
def capacity_forecast(test_mode):
|
|
"""Get capacity forecast"""
|
|
try:
|
|
if test_mode:
|
|
# Mock data for testing
|
|
mock_forecast = {
|
|
"forecast_days": 7,
|
|
"current_capacity": 1250,
|
|
"projected_capacity": 1400,
|
|
"growth_rate": 12.0,
|
|
"daily_projections": [
|
|
{"day": 1, "capacity": 1280},
|
|
{"day": 2, "capacity": 1310},
|
|
{"day": 3, "capacity": 1340},
|
|
{"day": 7, "capacity": 1400}
|
|
]
|
|
}
|
|
|
|
click.echo("🔮 Capacity Forecast:")
|
|
click.echo("=" * 50)
|
|
click.echo(f"📅 Forecast Period: {mock_forecast['forecast_days']} days")
|
|
click.echo(f"📊 Current Capacity: {mock_forecast['current_capacity']} GPU")
|
|
click.echo(f"📈 Projected Capacity: {mock_forecast['projected_capacity']} GPU")
|
|
click.echo(f"📊 Growth Rate: {mock_forecast['growth_rate']}%")
|
|
click.echo("")
|
|
click.echo("Daily Projections:")
|
|
for p in mock_forecast['daily_projections']:
|
|
click.echo(f" Day {p['day']}: {p['capacity']} GPU")
|
|
|
|
return
|
|
|
|
# Fetch from pool-hub service
|
|
config = get_config()
|
|
response = requests.get(
|
|
f"{config.pool_hub_url}/sla/capacity/forecast",
|
|
timeout=30
|
|
)
|
|
|
|
if response.status_code == 200:
|
|
forecast = response.json()
|
|
|
|
click.echo("🔮 Capacity Forecast:")
|
|
click.echo("=" * 50)
|
|
click.echo(f"📅 Forecast Period: {forecast['forecast_days']} days")
|
|
click.echo(f"📊 Current Capacity: {forecast['current_capacity']} GPU")
|
|
click.echo(f"📈 Projected Capacity: {forecast['projected_capacity']} GPU")
|
|
click.echo(f"📊 Growth Rate: {forecast['growth_rate']}%")
|
|
click.echo("")
|
|
click.echo("Daily Projections:")
|
|
for p in forecast['daily_projections']:
|
|
click.echo(f" Day {p['day']}: {p['capacity']} GPU")
|
|
else:
|
|
click.echo(f"❌ Failed to get forecast: {response.text}", err=True)
|
|
|
|
except Exception as e:
|
|
click.echo(f"❌ Error getting forecast: {str(e)}", err=True)
|
|
|
|
@pool_hub.command()
|
|
@click.option('--test-mode', is_flag=True, help='Run in test mode')
|
|
def capacity_recommendations(test_mode):
|
|
"""Get scaling recommendations"""
|
|
try:
|
|
if test_mode:
|
|
# Mock data for testing
|
|
mock_recommendations = [
|
|
{
|
|
"type": "scale_up",
|
|
"reason": "High utilization (>80%)",
|
|
"action": "Add 50 GPU capacity",
|
|
"priority": "high"
|
|
},
|
|
{
|
|
"type": "optimize",
|
|
"reason": "Imbalanced workload distribution",
|
|
"action": "Rebalance miners across regions",
|
|
"priority": "medium"
|
|
}
|
|
]
|
|
|
|
click.echo("💡 Capacity Recommendations:")
|
|
click.echo("=" * 50)
|
|
for r in mock_recommendations:
|
|
click.echo(f"📌 Type: {r['type']}")
|
|
click.echo(f" Reason: {r['reason']}")
|
|
click.echo(f" Action: {r['action']}")
|
|
click.echo(f" Priority: {r['priority']}")
|
|
click.echo("")
|
|
|
|
return
|
|
|
|
# Fetch from pool-hub service
|
|
config = get_config()
|
|
response = requests.get(
|
|
f"{config.pool_hub_url}/sla/capacity/recommendations",
|
|
timeout=30
|
|
)
|
|
|
|
if response.status_code == 200:
|
|
recommendations = response.json()
|
|
|
|
click.echo("💡 Capacity Recommendations:")
|
|
click.echo("=" * 50)
|
|
for r in recommendations:
|
|
click.echo(f"📌 Type: {r['type']}")
|
|
click.echo(f" Reason: {r['reason']}")
|
|
click.echo(f" Action: {r['action']}")
|
|
click.echo(f" Priority: {r['priority']}")
|
|
click.echo("")
|
|
else:
|
|
click.echo(f"❌ Failed to get recommendations: {response.text}", err=True)
|
|
|
|
except Exception as e:
|
|
click.echo(f"❌ Error getting recommendations: {str(e)}", err=True)
|
|
|
|
@pool_hub.command()
|
|
@click.option('--test-mode', is_flag=True, help='Run in test mode')
|
|
def billing_usage(test_mode):
|
|
"""Get billing usage data"""
|
|
try:
|
|
if test_mode:
|
|
# Mock data for testing
|
|
mock_usage = {
|
|
"period_start": "2024-03-01T00:00:00Z",
|
|
"period_end": "2024-03-31T23:59:59Z",
|
|
"total_gpu_hours": 45678,
|
|
"total_api_calls": 1234567,
|
|
"total_compute_hours": 23456,
|
|
"total_cost": 12500.50,
|
|
"by_miner": [
|
|
{"miner_id": "miner_001", "gpu_hours": 12000, "cost": 3280.50},
|
|
{"miner_id": "miner_002", "gpu_hours": 8900, "cost": 2435.00}
|
|
]
|
|
}
|
|
|
|
click.echo("💰 Billing Usage:")
|
|
click.echo("=" * 50)
|
|
click.echo(f"📅 Period: {mock_usage['period_start']} to {mock_usage['period_end']}")
|
|
click.echo(f"⚡ Total GPU Hours: {mock_usage['total_gpu_hours']}")
|
|
click.echo(f"📞 Total API Calls: {mock_usage['total_api_calls']}")
|
|
click.echo(f"🖥️ Total Compute Hours: {mock_usage['total_compute_hours']}")
|
|
click.echo(f"💵 Total Cost: ${mock_usage['total_cost']:.2f}")
|
|
click.echo("")
|
|
click.echo("By Miner:")
|
|
for m in mock_usage['by_miner']:
|
|
click.echo(f" {m['miner_id']}: {m['gpu_hours']} GPUh, ${m['cost']:.2f}")
|
|
|
|
return
|
|
|
|
# Fetch from pool-hub service
|
|
config = get_config()
|
|
response = requests.get(
|
|
f"{config.pool_hub_url}/sla/billing/usage",
|
|
timeout=30
|
|
)
|
|
|
|
if response.status_code == 200:
|
|
usage = response.json()
|
|
|
|
click.echo("💰 Billing Usage:")
|
|
click.echo("=" * 50)
|
|
click.echo(f"📅 Period: {usage['period_start']} to {usage['period_end']}")
|
|
click.echo(f"⚡ Total GPU Hours: {usage['total_gpu_hours']}")
|
|
click.echo(f"📞 Total API Calls: {usage['total_api_calls']}")
|
|
click.echo(f"🖥️ Total Compute Hours: {usage['total_compute_hours']}")
|
|
click.echo(f"💵 Total Cost: ${usage['total_cost']:.2f}")
|
|
click.echo("")
|
|
click.echo("By Miner:")
|
|
for m in usage['by_miner']:
|
|
click.echo(f" {m['miner_id']}: {m['gpu_hours']} GPUh, ${m['cost']:.2f}")
|
|
else:
|
|
click.echo(f"❌ Failed to get billing usage: {response.text}", err=True)
|
|
|
|
except Exception as e:
|
|
click.echo(f"❌ Error getting billing usage: {str(e)}", err=True)
|
|
|
|
@pool_hub.command()
|
|
@click.option('--test-mode', is_flag=True, help='Run in test mode')
|
|
def billing_sync(test_mode):
|
|
"""Trigger billing sync with coordinator-api"""
|
|
try:
|
|
if test_mode:
|
|
click.echo("🔄 Billing sync triggered (test mode)")
|
|
click.echo("✅ Sync completed successfully")
|
|
return
|
|
|
|
# Trigger sync with pool-hub service
|
|
config = get_config()
|
|
response = requests.post(
|
|
f"{config.pool_hub_url}/sla/billing/sync",
|
|
timeout=60
|
|
)
|
|
|
|
if response.status_code == 200:
|
|
result = response.json()
|
|
click.echo("🔄 Billing sync triggered")
|
|
click.echo(f"✅ Sync completed: {result.get('message', 'Success')}")
|
|
else:
|
|
click.echo(f"❌ Billing sync failed: {response.text}", err=True)
|
|
|
|
except Exception as e:
|
|
click.echo(f"❌ Error triggering billing sync: {str(e)}", err=True)
|
|
|
|
@pool_hub.command()
|
|
@click.option('--test-mode', is_flag=True, help='Run in test mode')
|
|
def collect_metrics(test_mode):
|
|
"""Trigger SLA metrics collection"""
|
|
try:
|
|
if test_mode:
|
|
click.echo("📊 SLA metrics collection triggered (test mode)")
|
|
click.echo("✅ Collection completed successfully")
|
|
return
|
|
|
|
# Trigger collection with pool-hub service
|
|
config = get_config()
|
|
response = requests.post(
|
|
f"{config.pool_hub_url}/sla/metrics/collect",
|
|
timeout=60
|
|
)
|
|
|
|
if response.status_code == 200:
|
|
result = response.json()
|
|
click.echo("📊 SLA metrics collection triggered")
|
|
click.echo(f"✅ Collection completed: {result.get('message', 'Success')}")
|
|
else:
|
|
click.echo(f"❌ Metrics collection failed: {response.text}", err=True)
|
|
|
|
except Exception as e:
|
|
click.echo(f"❌ Error triggering metrics collection: {str(e)}", err=True)
|
|
|
|
# Helper function to get config
|
|
def get_config():
|
|
"""Get CLI configuration"""
|
|
try:
|
|
from config import get_config
|
|
return get_config()
|
|
except ImportError:
|
|
# Fallback for testing
|
|
from types import SimpleNamespace
|
|
return SimpleNamespace(
|
|
pool_hub_url="http://localhost:8012",
|
|
api_key="test-api-key"
|
|
)
|
|
|
|
if __name__ == "__main__":
|
|
pool_hub()
|