Files
aitbc/cli/commands/pool_hub.py
aitbc 522655ef92
Some checks failed
API Endpoint Tests / test-api-endpoints (push) Successful in 10s
Blockchain Synchronization Verification / sync-verification (push) Failing after 3s
CLI Tests / test-cli (push) Failing after 4s
Documentation Validation / validate-docs (push) Successful in 8s
Documentation Validation / validate-policies-strict (push) Successful in 4s
Integration Tests / test-service-integration (push) Successful in 38s
Multi-Node Blockchain Health Monitoring / health-check (push) Successful in 2s
P2P Network Verification / p2p-verification (push) Successful in 3s
Security Scanning / security-scan (push) Successful in 40s
Smart Contract Tests / test-solidity (map[name:aitbc-token path:packages/solidity/aitbc-token]) (push) Successful in 15s
Smart Contract Tests / lint-solidity (push) Successful in 8s
Move blockchain app READMEs to centralized documentation
- Relocate blockchain-event-bridge README content to docs/apps/blockchain/blockchain-event-bridge.md
- Relocate blockchain-explorer README content to docs/apps/blockchain/blockchain-explorer.md
- Replace app READMEs with redirect notices pointing to new documentation location
- Consolidate documentation in central docs/ directory for better organization
2026-04-23 12:24:48 +02:00

487 lines
19 KiB
Python

"""
Pool Hub CLI Commands for AITBC
Commands for SLA monitoring, capacity planning, and billing integration
"""
import click
import json
import requests
from datetime import datetime
from typing import Dict, Any, List, Optional
@click.group()
def pool_hub():
"""Pool hub management commands for SLA monitoring and billing"""
pass
@pool_hub.command()
@click.argument('miner_id', required=False)
@click.option('--test-mode', is_flag=True, help='Run in test mode')
def sla_metrics(miner_id, test_mode):
"""Get SLA metrics for a miner or all miners"""
try:
if test_mode:
# Mock data for testing
if miner_id:
mock_metrics = {
"miner_id": miner_id,
"uptime_percentage": 97.5,
"response_time_ms": 850,
"job_completion_rate": 92.3,
"capacity_availability": 85.0,
"thresholds": {
"uptime": 95.0,
"response_time": 1000,
"completion_rate": 90.0,
"capacity": 80.0
},
"violations": [
{
"type": "response_time",
"threshold": 1000,
"actual": 1200,
"timestamp": "2024-03-15T14:30:00Z"
}
]
}
click.echo(f"📊 SLA Metrics for {miner_id}:")
click.echo("=" * 50)
click.echo(f"⏱️ Uptime: {mock_metrics['uptime_percentage']}% (threshold: {mock_metrics['thresholds']['uptime']}%)")
click.echo(f"⚡ Response Time: {mock_metrics['response_time_ms']}ms (threshold: {mock_metrics['thresholds']['response_time']}ms)")
click.echo(f"✅ Job Completion Rate: {mock_metrics['job_completion_rate']}% (threshold: {mock_metrics['thresholds']['completion_rate']}%)")
click.echo(f"📦 Capacity Availability: {mock_metrics['capacity_availability']}% (threshold: {mock_metrics['thresholds']['capacity']}%)")
if mock_metrics['violations']:
click.echo("")
click.echo("⚠️ Violations:")
for v in mock_metrics['violations']:
click.echo(f" {v['type']}: {v['actual']} vs threshold {v['threshold']} at {v['timestamp']}")
else:
mock_metrics = {
"total_miners": 45,
"average_uptime": 96.2,
"average_response_time": 780,
"average_completion_rate": 94.1,
"average_capacity": 88.5,
"miners_below_threshold": 3
}
click.echo("📊 SLA Metrics (All Miners):")
click.echo("=" * 50)
click.echo(f"👥 Total Miners: {mock_metrics['total_miners']}")
click.echo(f"⏱️ Average Uptime: {mock_metrics['average_uptime']}%")
click.echo(f"⚡ Average Response Time: {mock_metrics['average_response_time']}ms")
click.echo(f"✅ Average Completion Rate: {mock_metrics['average_completion_rate']}%")
click.echo(f"📦 Average Capacity: {mock_metrics['average_capacity']}%")
click.echo(f"⚠️ Miners Below Threshold: {mock_metrics['miners_below_threshold']}")
return
# Fetch from pool-hub service
config = get_config()
if miner_id:
response = requests.get(
f"{config.pool_hub_url}/sla/metrics/{miner_id}",
timeout=30
)
else:
response = requests.get(
f"{config.pool_hub_url}/sla/metrics",
timeout=30
)
if response.status_code == 200:
metrics = response.json()
if miner_id:
click.echo(f"📊 SLA Metrics for {miner_id}:")
click.echo("=" * 50)
click.echo(f"⏱️ Uptime: {metrics.get('uptime_percentage', 0)}%")
click.echo(f"⚡ Response Time: {metrics.get('response_time_ms', 0)}ms")
click.echo(f"✅ Job Completion Rate: {metrics.get('job_completion_rate', 0)}%")
click.echo(f"📦 Capacity Availability: {metrics.get('capacity_availability', 0)}%")
else:
click.echo("📊 SLA Metrics (All Miners):")
click.echo("=" * 50)
click.echo(f"👥 Total Miners: {metrics.get('total_miners', 0)}")
click.echo(f"⏱️ Average Uptime: {metrics.get('average_uptime', 0)}%")
click.echo(f"⚡ Average Response Time: {metrics.get('average_response_time', 0)}ms")
click.echo(f"✅ Average Completion Rate: {metrics.get('average_completion_rate', 0)}%")
else:
click.echo(f"❌ Failed to get SLA metrics: {response.text}", err=True)
except Exception as e:
click.echo(f"❌ Error getting SLA metrics: {str(e)}", err=True)
@pool_hub.command()
@click.option('--test-mode', is_flag=True, help='Run in test mode')
def sla_violations(test_mode):
"""Get SLA violations across all miners"""
try:
if test_mode:
# Mock data for testing
mock_violations = [
{
"miner_id": "miner_001",
"type": "response_time",
"threshold": 1000,
"actual": 1200,
"timestamp": "2024-03-15T14:30:00Z"
},
{
"miner_id": "miner_002",
"type": "uptime",
"threshold": 95.0,
"actual": 92.5,
"timestamp": "2024-03-15T13:45:00Z"
}
]
click.echo("⚠️ SLA Violations:")
click.echo("=" * 50)
for v in mock_violations:
click.echo(f"👤 Miner: {v['miner_id']}")
click.echo(f" Type: {v['type']}")
click.echo(f" Threshold: {v['threshold']}")
click.echo(f" Actual: {v['actual']}")
click.echo(f" Timestamp: {v['timestamp']}")
click.echo("")
return
# Fetch from pool-hub service
config = get_config()
response = requests.get(
f"{config.pool_hub_url}/sla/violations",
timeout=30
)
if response.status_code == 200:
violations = response.json()
click.echo("⚠️ SLA Violations:")
click.echo("=" * 50)
for v in violations:
click.echo(f"👤 Miner: {v['miner_id']}")
click.echo(f" Type: {v['type']}")
click.echo(f" Threshold: {v['threshold']}")
click.echo(f" Actual: {v['actual']}")
click.echo(f" Timestamp: {v['timestamp']}")
click.echo("")
else:
click.echo(f"❌ Failed to get violations: {response.text}", err=True)
except Exception as e:
click.echo(f"❌ Error getting violations: {str(e)}", err=True)
@pool_hub.command()
@click.option('--test-mode', is_flag=True, help='Run in test mode')
def capacity_snapshots(test_mode):
"""Get capacity planning snapshots"""
try:
if test_mode:
# Mock data for testing
mock_snapshots = [
{
"timestamp": "2024-03-15T00:00:00Z",
"total_capacity": 1250,
"available_capacity": 320,
"utilization": 74.4,
"active_miners": 42
},
{
"timestamp": "2024-03-14T00:00:00Z",
"total_capacity": 1200,
"available_capacity": 350,
"utilization": 70.8,
"active_miners": 40
}
]
click.echo("📊 Capacity Snapshots:")
click.echo("=" * 50)
for s in mock_snapshots:
click.echo(f"🕐 Timestamp: {s['timestamp']}")
click.echo(f" Total Capacity: {s['total_capacity']} GPU")
click.echo(f" Available: {s['available_capacity']} GPU")
click.echo(f" Utilization: {s['utilization']}%")
click.echo(f" Active Miners: {s['active_miners']}")
click.echo("")
return
# Fetch from pool-hub service
config = get_config()
response = requests.get(
f"{config.pool_hub_url}/sla/capacity/snapshots",
timeout=30
)
if response.status_code == 200:
snapshots = response.json()
click.echo("📊 Capacity Snapshots:")
click.echo("=" * 50)
for s in snapshots:
click.echo(f"🕐 Timestamp: {s['timestamp']}")
click.echo(f" Total Capacity: {s['total_capacity']} GPU")
click.echo(f" Available: {s['available_capacity']} GPU")
click.echo(f" Utilization: {s['utilization']}%")
click.echo(f" Active Miners: {s['active_miners']}")
click.echo("")
else:
click.echo(f"❌ Failed to get snapshots: {response.text}", err=True)
except Exception as e:
click.echo(f"❌ Error getting snapshots: {str(e)}", err=True)
@pool_hub.command()
@click.option('--test-mode', is_flag=True, help='Run in test mode')
def capacity_forecast(test_mode):
"""Get capacity forecast"""
try:
if test_mode:
# Mock data for testing
mock_forecast = {
"forecast_days": 7,
"current_capacity": 1250,
"projected_capacity": 1400,
"growth_rate": 12.0,
"daily_projections": [
{"day": 1, "capacity": 1280},
{"day": 2, "capacity": 1310},
{"day": 3, "capacity": 1340},
{"day": 7, "capacity": 1400}
]
}
click.echo("🔮 Capacity Forecast:")
click.echo("=" * 50)
click.echo(f"📅 Forecast Period: {mock_forecast['forecast_days']} days")
click.echo(f"📊 Current Capacity: {mock_forecast['current_capacity']} GPU")
click.echo(f"📈 Projected Capacity: {mock_forecast['projected_capacity']} GPU")
click.echo(f"📊 Growth Rate: {mock_forecast['growth_rate']}%")
click.echo("")
click.echo("Daily Projections:")
for p in mock_forecast['daily_projections']:
click.echo(f" Day {p['day']}: {p['capacity']} GPU")
return
# Fetch from pool-hub service
config = get_config()
response = requests.get(
f"{config.pool_hub_url}/sla/capacity/forecast",
timeout=30
)
if response.status_code == 200:
forecast = response.json()
click.echo("🔮 Capacity Forecast:")
click.echo("=" * 50)
click.echo(f"📅 Forecast Period: {forecast['forecast_days']} days")
click.echo(f"📊 Current Capacity: {forecast['current_capacity']} GPU")
click.echo(f"📈 Projected Capacity: {forecast['projected_capacity']} GPU")
click.echo(f"📊 Growth Rate: {forecast['growth_rate']}%")
click.echo("")
click.echo("Daily Projections:")
for p in forecast['daily_projections']:
click.echo(f" Day {p['day']}: {p['capacity']} GPU")
else:
click.echo(f"❌ Failed to get forecast: {response.text}", err=True)
except Exception as e:
click.echo(f"❌ Error getting forecast: {str(e)}", err=True)
@pool_hub.command()
@click.option('--test-mode', is_flag=True, help='Run in test mode')
def capacity_recommendations(test_mode):
"""Get scaling recommendations"""
try:
if test_mode:
# Mock data for testing
mock_recommendations = [
{
"type": "scale_up",
"reason": "High utilization (>80%)",
"action": "Add 50 GPU capacity",
"priority": "high"
},
{
"type": "optimize",
"reason": "Imbalanced workload distribution",
"action": "Rebalance miners across regions",
"priority": "medium"
}
]
click.echo("💡 Capacity Recommendations:")
click.echo("=" * 50)
for r in mock_recommendations:
click.echo(f"📌 Type: {r['type']}")
click.echo(f" Reason: {r['reason']}")
click.echo(f" Action: {r['action']}")
click.echo(f" Priority: {r['priority']}")
click.echo("")
return
# Fetch from pool-hub service
config = get_config()
response = requests.get(
f"{config.pool_hub_url}/sla/capacity/recommendations",
timeout=30
)
if response.status_code == 200:
recommendations = response.json()
click.echo("💡 Capacity Recommendations:")
click.echo("=" * 50)
for r in recommendations:
click.echo(f"📌 Type: {r['type']}")
click.echo(f" Reason: {r['reason']}")
click.echo(f" Action: {r['action']}")
click.echo(f" Priority: {r['priority']}")
click.echo("")
else:
click.echo(f"❌ Failed to get recommendations: {response.text}", err=True)
except Exception as e:
click.echo(f"❌ Error getting recommendations: {str(e)}", err=True)
@pool_hub.command()
@click.option('--test-mode', is_flag=True, help='Run in test mode')
def billing_usage(test_mode):
"""Get billing usage data"""
try:
if test_mode:
# Mock data for testing
mock_usage = {
"period_start": "2024-03-01T00:00:00Z",
"period_end": "2024-03-31T23:59:59Z",
"total_gpu_hours": 45678,
"total_api_calls": 1234567,
"total_compute_hours": 23456,
"total_cost": 12500.50,
"by_miner": [
{"miner_id": "miner_001", "gpu_hours": 12000, "cost": 3280.50},
{"miner_id": "miner_002", "gpu_hours": 8900, "cost": 2435.00}
]
}
click.echo("💰 Billing Usage:")
click.echo("=" * 50)
click.echo(f"📅 Period: {mock_usage['period_start']} to {mock_usage['period_end']}")
click.echo(f"⚡ Total GPU Hours: {mock_usage['total_gpu_hours']}")
click.echo(f"📞 Total API Calls: {mock_usage['total_api_calls']}")
click.echo(f"🖥️ Total Compute Hours: {mock_usage['total_compute_hours']}")
click.echo(f"💵 Total Cost: ${mock_usage['total_cost']:.2f}")
click.echo("")
click.echo("By Miner:")
for m in mock_usage['by_miner']:
click.echo(f" {m['miner_id']}: {m['gpu_hours']} GPUh, ${m['cost']:.2f}")
return
# Fetch from pool-hub service
config = get_config()
response = requests.get(
f"{config.pool_hub_url}/sla/billing/usage",
timeout=30
)
if response.status_code == 200:
usage = response.json()
click.echo("💰 Billing Usage:")
click.echo("=" * 50)
click.echo(f"📅 Period: {usage['period_start']} to {usage['period_end']}")
click.echo(f"⚡ Total GPU Hours: {usage['total_gpu_hours']}")
click.echo(f"📞 Total API Calls: {usage['total_api_calls']}")
click.echo(f"🖥️ Total Compute Hours: {usage['total_compute_hours']}")
click.echo(f"💵 Total Cost: ${usage['total_cost']:.2f}")
click.echo("")
click.echo("By Miner:")
for m in usage['by_miner']:
click.echo(f" {m['miner_id']}: {m['gpu_hours']} GPUh, ${m['cost']:.2f}")
else:
click.echo(f"❌ Failed to get billing usage: {response.text}", err=True)
except Exception as e:
click.echo(f"❌ Error getting billing usage: {str(e)}", err=True)
@pool_hub.command()
@click.option('--test-mode', is_flag=True, help='Run in test mode')
def billing_sync(test_mode):
"""Trigger billing sync with coordinator-api"""
try:
if test_mode:
click.echo("🔄 Billing sync triggered (test mode)")
click.echo("✅ Sync completed successfully")
return
# Trigger sync with pool-hub service
config = get_config()
response = requests.post(
f"{config.pool_hub_url}/sla/billing/sync",
timeout=60
)
if response.status_code == 200:
result = response.json()
click.echo("🔄 Billing sync triggered")
click.echo(f"✅ Sync completed: {result.get('message', 'Success')}")
else:
click.echo(f"❌ Billing sync failed: {response.text}", err=True)
except Exception as e:
click.echo(f"❌ Error triggering billing sync: {str(e)}", err=True)
@pool_hub.command()
@click.option('--test-mode', is_flag=True, help='Run in test mode')
def collect_metrics(test_mode):
"""Trigger SLA metrics collection"""
try:
if test_mode:
click.echo("📊 SLA metrics collection triggered (test mode)")
click.echo("✅ Collection completed successfully")
return
# Trigger collection with pool-hub service
config = get_config()
response = requests.post(
f"{config.pool_hub_url}/sla/metrics/collect",
timeout=60
)
if response.status_code == 200:
result = response.json()
click.echo("📊 SLA metrics collection triggered")
click.echo(f"✅ Collection completed: {result.get('message', 'Success')}")
else:
click.echo(f"❌ Metrics collection failed: {response.text}", err=True)
except Exception as e:
click.echo(f"❌ Error triggering metrics collection: {str(e)}", err=True)
# Helper function to get config
def get_config():
"""Get CLI configuration"""
try:
from config import get_config
return get_config()
except ImportError:
# Fallback for testing
from types import SimpleNamespace
return SimpleNamespace(
pool_hub_url="http://localhost:8012",
api_key="test-api-key"
)
if __name__ == "__main__":
pool_hub()