""" Pool Hub CLI Commands for AITBC Commands for SLA monitoring, capacity planning, and billing integration """ import click import json import requests from datetime import datetime from typing import Dict, Any, List, Optional @click.group() def pool_hub(): """Pool hub management commands for SLA monitoring and billing""" pass @pool_hub.command() @click.argument('miner_id', required=False) @click.option('--test-mode', is_flag=True, help='Run in test mode') def sla_metrics(miner_id, test_mode): """Get SLA metrics for a miner or all miners""" try: if test_mode: # Mock data for testing if miner_id: mock_metrics = { "miner_id": miner_id, "uptime_percentage": 97.5, "response_time_ms": 850, "job_completion_rate": 92.3, "capacity_availability": 85.0, "thresholds": { "uptime": 95.0, "response_time": 1000, "completion_rate": 90.0, "capacity": 80.0 }, "violations": [ { "type": "response_time", "threshold": 1000, "actual": 1200, "timestamp": "2024-03-15T14:30:00Z" } ] } click.echo(f"📊 SLA Metrics for {miner_id}:") click.echo("=" * 50) click.echo(f"⏱️ Uptime: {mock_metrics['uptime_percentage']}% (threshold: {mock_metrics['thresholds']['uptime']}%)") click.echo(f"⚡ Response Time: {mock_metrics['response_time_ms']}ms (threshold: {mock_metrics['thresholds']['response_time']}ms)") click.echo(f"✅ Job Completion Rate: {mock_metrics['job_completion_rate']}% (threshold: {mock_metrics['thresholds']['completion_rate']}%)") click.echo(f"📦 Capacity Availability: {mock_metrics['capacity_availability']}% (threshold: {mock_metrics['thresholds']['capacity']}%)") if mock_metrics['violations']: click.echo("") click.echo("⚠️ Violations:") for v in mock_metrics['violations']: click.echo(f" {v['type']}: {v['actual']} vs threshold {v['threshold']} at {v['timestamp']}") else: mock_metrics = { "total_miners": 45, "average_uptime": 96.2, "average_response_time": 780, "average_completion_rate": 94.1, "average_capacity": 88.5, "miners_below_threshold": 3 } click.echo("📊 SLA Metrics (All Miners):") click.echo("=" * 50) click.echo(f"👥 Total Miners: {mock_metrics['total_miners']}") click.echo(f"⏱️ Average Uptime: {mock_metrics['average_uptime']}%") click.echo(f"⚡ Average Response Time: {mock_metrics['average_response_time']}ms") click.echo(f"✅ Average Completion Rate: {mock_metrics['average_completion_rate']}%") click.echo(f"📦 Average Capacity: {mock_metrics['average_capacity']}%") click.echo(f"⚠️ Miners Below Threshold: {mock_metrics['miners_below_threshold']}") return # Fetch from pool-hub service config = get_config() if miner_id: response = requests.get( f"{config.pool_hub_url}/sla/metrics/{miner_id}", timeout=30 ) else: response = requests.get( f"{config.pool_hub_url}/sla/metrics", timeout=30 ) if response.status_code == 200: metrics = response.json() if miner_id: click.echo(f"📊 SLA Metrics for {miner_id}:") click.echo("=" * 50) click.echo(f"⏱️ Uptime: {metrics.get('uptime_percentage', 0)}%") click.echo(f"⚡ Response Time: {metrics.get('response_time_ms', 0)}ms") click.echo(f"✅ Job Completion Rate: {metrics.get('job_completion_rate', 0)}%") click.echo(f"📦 Capacity Availability: {metrics.get('capacity_availability', 0)}%") else: click.echo("📊 SLA Metrics (All Miners):") click.echo("=" * 50) click.echo(f"👥 Total Miners: {metrics.get('total_miners', 0)}") click.echo(f"⏱️ Average Uptime: {metrics.get('average_uptime', 0)}%") click.echo(f"⚡ Average Response Time: {metrics.get('average_response_time', 0)}ms") click.echo(f"✅ Average Completion Rate: {metrics.get('average_completion_rate', 0)}%") else: click.echo(f"❌ Failed to get SLA metrics: {response.text}", err=True) except Exception as e: click.echo(f"❌ Error getting SLA metrics: {str(e)}", err=True) @pool_hub.command() @click.option('--test-mode', is_flag=True, help='Run in test mode') def sla_violations(test_mode): """Get SLA violations across all miners""" try: if test_mode: # Mock data for testing mock_violations = [ { "miner_id": "miner_001", "type": "response_time", "threshold": 1000, "actual": 1200, "timestamp": "2024-03-15T14:30:00Z" }, { "miner_id": "miner_002", "type": "uptime", "threshold": 95.0, "actual": 92.5, "timestamp": "2024-03-15T13:45:00Z" } ] click.echo("⚠️ SLA Violations:") click.echo("=" * 50) for v in mock_violations: click.echo(f"👤 Miner: {v['miner_id']}") click.echo(f" Type: {v['type']}") click.echo(f" Threshold: {v['threshold']}") click.echo(f" Actual: {v['actual']}") click.echo(f" Timestamp: {v['timestamp']}") click.echo("") return # Fetch from pool-hub service config = get_config() response = requests.get( f"{config.pool_hub_url}/sla/violations", timeout=30 ) if response.status_code == 200: violations = response.json() click.echo("⚠️ SLA Violations:") click.echo("=" * 50) for v in violations: click.echo(f"👤 Miner: {v['miner_id']}") click.echo(f" Type: {v['type']}") click.echo(f" Threshold: {v['threshold']}") click.echo(f" Actual: {v['actual']}") click.echo(f" Timestamp: {v['timestamp']}") click.echo("") else: click.echo(f"❌ Failed to get violations: {response.text}", err=True) except Exception as e: click.echo(f"❌ Error getting violations: {str(e)}", err=True) @pool_hub.command() @click.option('--test-mode', is_flag=True, help='Run in test mode') def capacity_snapshots(test_mode): """Get capacity planning snapshots""" try: if test_mode: # Mock data for testing mock_snapshots = [ { "timestamp": "2024-03-15T00:00:00Z", "total_capacity": 1250, "available_capacity": 320, "utilization": 74.4, "active_miners": 42 }, { "timestamp": "2024-03-14T00:00:00Z", "total_capacity": 1200, "available_capacity": 350, "utilization": 70.8, "active_miners": 40 } ] click.echo("📊 Capacity Snapshots:") click.echo("=" * 50) for s in mock_snapshots: click.echo(f"🕐 Timestamp: {s['timestamp']}") click.echo(f" Total Capacity: {s['total_capacity']} GPU") click.echo(f" Available: {s['available_capacity']} GPU") click.echo(f" Utilization: {s['utilization']}%") click.echo(f" Active Miners: {s['active_miners']}") click.echo("") return # Fetch from pool-hub service config = get_config() response = requests.get( f"{config.pool_hub_url}/sla/capacity/snapshots", timeout=30 ) if response.status_code == 200: snapshots = response.json() click.echo("📊 Capacity Snapshots:") click.echo("=" * 50) for s in snapshots: click.echo(f"🕐 Timestamp: {s['timestamp']}") click.echo(f" Total Capacity: {s['total_capacity']} GPU") click.echo(f" Available: {s['available_capacity']} GPU") click.echo(f" Utilization: {s['utilization']}%") click.echo(f" Active Miners: {s['active_miners']}") click.echo("") else: click.echo(f"❌ Failed to get snapshots: {response.text}", err=True) except Exception as e: click.echo(f"❌ Error getting snapshots: {str(e)}", err=True) @pool_hub.command() @click.option('--test-mode', is_flag=True, help='Run in test mode') def capacity_forecast(test_mode): """Get capacity forecast""" try: if test_mode: # Mock data for testing mock_forecast = { "forecast_days": 7, "current_capacity": 1250, "projected_capacity": 1400, "growth_rate": 12.0, "daily_projections": [ {"day": 1, "capacity": 1280}, {"day": 2, "capacity": 1310}, {"day": 3, "capacity": 1340}, {"day": 7, "capacity": 1400} ] } click.echo("🔮 Capacity Forecast:") click.echo("=" * 50) click.echo(f"📅 Forecast Period: {mock_forecast['forecast_days']} days") click.echo(f"📊 Current Capacity: {mock_forecast['current_capacity']} GPU") click.echo(f"📈 Projected Capacity: {mock_forecast['projected_capacity']} GPU") click.echo(f"📊 Growth Rate: {mock_forecast['growth_rate']}%") click.echo("") click.echo("Daily Projections:") for p in mock_forecast['daily_projections']: click.echo(f" Day {p['day']}: {p['capacity']} GPU") return # Fetch from pool-hub service config = get_config() response = requests.get( f"{config.pool_hub_url}/sla/capacity/forecast", timeout=30 ) if response.status_code == 200: forecast = response.json() click.echo("🔮 Capacity Forecast:") click.echo("=" * 50) click.echo(f"📅 Forecast Period: {forecast['forecast_days']} days") click.echo(f"📊 Current Capacity: {forecast['current_capacity']} GPU") click.echo(f"📈 Projected Capacity: {forecast['projected_capacity']} GPU") click.echo(f"📊 Growth Rate: {forecast['growth_rate']}%") click.echo("") click.echo("Daily Projections:") for p in forecast['daily_projections']: click.echo(f" Day {p['day']}: {p['capacity']} GPU") else: click.echo(f"❌ Failed to get forecast: {response.text}", err=True) except Exception as e: click.echo(f"❌ Error getting forecast: {str(e)}", err=True) @pool_hub.command() @click.option('--test-mode', is_flag=True, help='Run in test mode') def capacity_recommendations(test_mode): """Get scaling recommendations""" try: if test_mode: # Mock data for testing mock_recommendations = [ { "type": "scale_up", "reason": "High utilization (>80%)", "action": "Add 50 GPU capacity", "priority": "high" }, { "type": "optimize", "reason": "Imbalanced workload distribution", "action": "Rebalance miners across regions", "priority": "medium" } ] click.echo("💡 Capacity Recommendations:") click.echo("=" * 50) for r in mock_recommendations: click.echo(f"📌 Type: {r['type']}") click.echo(f" Reason: {r['reason']}") click.echo(f" Action: {r['action']}") click.echo(f" Priority: {r['priority']}") click.echo("") return # Fetch from pool-hub service config = get_config() response = requests.get( f"{config.pool_hub_url}/sla/capacity/recommendations", timeout=30 ) if response.status_code == 200: recommendations = response.json() click.echo("💡 Capacity Recommendations:") click.echo("=" * 50) for r in recommendations: click.echo(f"📌 Type: {r['type']}") click.echo(f" Reason: {r['reason']}") click.echo(f" Action: {r['action']}") click.echo(f" Priority: {r['priority']}") click.echo("") else: click.echo(f"❌ Failed to get recommendations: {response.text}", err=True) except Exception as e: click.echo(f"❌ Error getting recommendations: {str(e)}", err=True) @pool_hub.command() @click.option('--test-mode', is_flag=True, help='Run in test mode') def billing_usage(test_mode): """Get billing usage data""" try: if test_mode: # Mock data for testing mock_usage = { "period_start": "2024-03-01T00:00:00Z", "period_end": "2024-03-31T23:59:59Z", "total_gpu_hours": 45678, "total_api_calls": 1234567, "total_compute_hours": 23456, "total_cost": 12500.50, "by_miner": [ {"miner_id": "miner_001", "gpu_hours": 12000, "cost": 3280.50}, {"miner_id": "miner_002", "gpu_hours": 8900, "cost": 2435.00} ] } click.echo("💰 Billing Usage:") click.echo("=" * 50) click.echo(f"📅 Period: {mock_usage['period_start']} to {mock_usage['period_end']}") click.echo(f"⚡ Total GPU Hours: {mock_usage['total_gpu_hours']}") click.echo(f"📞 Total API Calls: {mock_usage['total_api_calls']}") click.echo(f"🖥️ Total Compute Hours: {mock_usage['total_compute_hours']}") click.echo(f"💵 Total Cost: ${mock_usage['total_cost']:.2f}") click.echo("") click.echo("By Miner:") for m in mock_usage['by_miner']: click.echo(f" {m['miner_id']}: {m['gpu_hours']} GPUh, ${m['cost']:.2f}") return # Fetch from pool-hub service config = get_config() response = requests.get( f"{config.pool_hub_url}/sla/billing/usage", timeout=30 ) if response.status_code == 200: usage = response.json() click.echo("💰 Billing Usage:") click.echo("=" * 50) click.echo(f"📅 Period: {usage['period_start']} to {usage['period_end']}") click.echo(f"⚡ Total GPU Hours: {usage['total_gpu_hours']}") click.echo(f"📞 Total API Calls: {usage['total_api_calls']}") click.echo(f"🖥️ Total Compute Hours: {usage['total_compute_hours']}") click.echo(f"💵 Total Cost: ${usage['total_cost']:.2f}") click.echo("") click.echo("By Miner:") for m in usage['by_miner']: click.echo(f" {m['miner_id']}: {m['gpu_hours']} GPUh, ${m['cost']:.2f}") else: click.echo(f"❌ Failed to get billing usage: {response.text}", err=True) except Exception as e: click.echo(f"❌ Error getting billing usage: {str(e)}", err=True) @pool_hub.command() @click.option('--test-mode', is_flag=True, help='Run in test mode') def billing_sync(test_mode): """Trigger billing sync with coordinator-api""" try: if test_mode: click.echo("🔄 Billing sync triggered (test mode)") click.echo("✅ Sync completed successfully") return # Trigger sync with pool-hub service config = get_config() response = requests.post( f"{config.pool_hub_url}/sla/billing/sync", timeout=60 ) if response.status_code == 200: result = response.json() click.echo("🔄 Billing sync triggered") click.echo(f"✅ Sync completed: {result.get('message', 'Success')}") else: click.echo(f"❌ Billing sync failed: {response.text}", err=True) except Exception as e: click.echo(f"❌ Error triggering billing sync: {str(e)}", err=True) @pool_hub.command() @click.option('--test-mode', is_flag=True, help='Run in test mode') def collect_metrics(test_mode): """Trigger SLA metrics collection""" try: if test_mode: click.echo("📊 SLA metrics collection triggered (test mode)") click.echo("✅ Collection completed successfully") return # Trigger collection with pool-hub service config = get_config() response = requests.post( f"{config.pool_hub_url}/sla/metrics/collect", timeout=60 ) if response.status_code == 200: result = response.json() click.echo("📊 SLA metrics collection triggered") click.echo(f"✅ Collection completed: {result.get('message', 'Success')}") else: click.echo(f"❌ Metrics collection failed: {response.text}", err=True) except Exception as e: click.echo(f"❌ Error triggering metrics collection: {str(e)}", err=True) # Helper function to get config def get_config(): """Get CLI configuration""" try: from config import get_config return get_config() except ImportError: # Fallback for testing from types import SimpleNamespace return SimpleNamespace( pool_hub_url="http://localhost:8012", api_key="test-api-key" ) if __name__ == "__main__": pool_hub()