Move blockchain app READMEs to centralized documentation

- Relocate blockchain-event-bridge README content to docs/apps/blockchain/blockchain-event-bridge.md - Relocate blockchain-explorer README content to docs/apps/blockchain/blockchain-explorer.md - Replace app READMEs with redirect notices pointing to new documentation location - Consolidate documentation in central docs/ directory for better organization
2026-04-23 12:24:48 +02:00
parent cd240485c6
commit 522655ef92
55 changed files with 7033 additions and 1536 deletions
--- a/cli/commands/pool_hub.py
+++ b/cli/commands/pool_hub.py
@@ -0,0 +1,486 @@
+"""
+Pool Hub CLI Commands for AITBC
+Commands for SLA monitoring, capacity planning, and billing integration
+"""
+
+import click
+import json
+import requests
+from datetime import datetime
+from typing import Dict, Any, List, Optional
+
+@click.group()
+def pool_hub():
+    """Pool hub management commands for SLA monitoring and billing"""
+    pass
+
+@pool_hub.command()
+@click.argument('miner_id', required=False)
+@click.option('--test-mode', is_flag=True, help='Run in test mode')
+def sla_metrics(miner_id, test_mode):
+    """Get SLA metrics for a miner or all miners"""
+    try:
+        if test_mode:
+            # Mock data for testing
+            if miner_id:
+                mock_metrics = {
+                    "miner_id": miner_id,
+                    "uptime_percentage": 97.5,
+                    "response_time_ms": 850,
+                    "job_completion_rate": 92.3,
+                    "capacity_availability": 85.0,
+                    "thresholds": {
+                        "uptime": 95.0,
+                        "response_time": 1000,
+                        "completion_rate": 90.0,
+                        "capacity": 80.0
+                    },
+                    "violations": [
+                        {
+                            "type": "response_time",
+                            "threshold": 1000,
+                            "actual": 1200,
+                            "timestamp": "2024-03-15T14:30:00Z"
+                        }
+                    ]
+                }
+                
+                click.echo(f"📊 SLA Metrics for {miner_id}:")
+                click.echo("=" * 50)
+                click.echo(f"⏱️  Uptime: {mock_metrics['uptime_percentage']}% (threshold: {mock_metrics['thresholds']['uptime']}%)")
+                click.echo(f"⚡ Response Time: {mock_metrics['response_time_ms']}ms (threshold: {mock_metrics['thresholds']['response_time']}ms)")
+                click.echo(f"✅ Job Completion Rate: {mock_metrics['job_completion_rate']}% (threshold: {mock_metrics['thresholds']['completion_rate']}%)")
+                click.echo(f"📦 Capacity Availability: {mock_metrics['capacity_availability']}% (threshold: {mock_metrics['thresholds']['capacity']}%)")
+                
+                if mock_metrics['violations']:
+                    click.echo("")
+                    click.echo("⚠️  Violations:")
+                    for v in mock_metrics['violations']:
+                        click.echo(f"   {v['type']}: {v['actual']} vs threshold {v['threshold']} at {v['timestamp']}")
+            else:
+                mock_metrics = {
+                    "total_miners": 45,
+                    "average_uptime": 96.2,
+                    "average_response_time": 780,
+                    "average_completion_rate": 94.1,
+                    "average_capacity": 88.5,
+                    "miners_below_threshold": 3
+                }
+                
+                click.echo("📊 SLA Metrics (All Miners):")
+                click.echo("=" * 50)
+                click.echo(f"👥 Total Miners: {mock_metrics['total_miners']}")
+                click.echo(f"⏱️  Average Uptime: {mock_metrics['average_uptime']}%")
+                click.echo(f"⚡ Average Response Time: {mock_metrics['average_response_time']}ms")
+                click.echo(f"✅ Average Completion Rate: {mock_metrics['average_completion_rate']}%")
+                click.echo(f"📦 Average Capacity: {mock_metrics['average_capacity']}%")
+                click.echo(f"⚠️  Miners Below Threshold: {mock_metrics['miners_below_threshold']}")
+            
+            return
+        
+        # Fetch from pool-hub service
+        config = get_config()
+        
+        if miner_id:
+            response = requests.get(
+                f"{config.pool_hub_url}/sla/metrics/{miner_id}",
+                timeout=30
+            )
+        else:
+            response = requests.get(
+                f"{config.pool_hub_url}/sla/metrics",
+                timeout=30
+            )
+        
+        if response.status_code == 200:
+            metrics = response.json()
+            
+            if miner_id:
+                click.echo(f"📊 SLA Metrics for {miner_id}:")
+                click.echo("=" * 50)
+                click.echo(f"⏱️  Uptime: {metrics.get('uptime_percentage', 0)}%")
+                click.echo(f"⚡ Response Time: {metrics.get('response_time_ms', 0)}ms")
+                click.echo(f"✅ Job Completion Rate: {metrics.get('job_completion_rate', 0)}%")
+                click.echo(f"📦 Capacity Availability: {metrics.get('capacity_availability', 0)}%")
+            else:
+                click.echo("📊 SLA Metrics (All Miners):")
+                click.echo("=" * 50)
+                click.echo(f"👥 Total Miners: {metrics.get('total_miners', 0)}")
+                click.echo(f"⏱️  Average Uptime: {metrics.get('average_uptime', 0)}%")
+                click.echo(f"⚡ Average Response Time: {metrics.get('average_response_time', 0)}ms")
+                click.echo(f"✅ Average Completion Rate: {metrics.get('average_completion_rate', 0)}%")
+        else:
+            click.echo(f"❌ Failed to get SLA metrics: {response.text}", err=True)
+            
+    except Exception as e:
+        click.echo(f"❌ Error getting SLA metrics: {str(e)}", err=True)
+
+@pool_hub.command()
+@click.option('--test-mode', is_flag=True, help='Run in test mode')
+def sla_violations(test_mode):
+    """Get SLA violations across all miners"""
+    try:
+        if test_mode:
+            # Mock data for testing
+            mock_violations = [
+                {
+                    "miner_id": "miner_001",
+                    "type": "response_time",
+                    "threshold": 1000,
+                    "actual": 1200,
+                    "timestamp": "2024-03-15T14:30:00Z"
+                },
+                {
+                    "miner_id": "miner_002",
+                    "type": "uptime",
+                    "threshold": 95.0,
+                    "actual": 92.5,
+                    "timestamp": "2024-03-15T13:45:00Z"
+                }
+            ]
+            
+            click.echo("⚠️  SLA Violations:")
+            click.echo("=" * 50)
+            for v in mock_violations:
+                click.echo(f"👤 Miner: {v['miner_id']}")
+                click.echo(f"   Type: {v['type']}")
+                click.echo(f"   Threshold: {v['threshold']}")
+                click.echo(f"   Actual: {v['actual']}")
+                click.echo(f"   Timestamp: {v['timestamp']}")
+                click.echo("")
+            
+            return
+        
+        # Fetch from pool-hub service
+        config = get_config()
+        response = requests.get(
+            f"{config.pool_hub_url}/sla/violations",
+            timeout=30
+        )
+        
+        if response.status_code == 200:
+            violations = response.json()
+            
+            click.echo("⚠️  SLA Violations:")
+            click.echo("=" * 50)
+            for v in violations:
+                click.echo(f"👤 Miner: {v['miner_id']}")
+                click.echo(f"   Type: {v['type']}")
+                click.echo(f"   Threshold: {v['threshold']}")
+                click.echo(f"   Actual: {v['actual']}")
+                click.echo(f"   Timestamp: {v['timestamp']}")
+                click.echo("")
+        else:
+            click.echo(f"❌ Failed to get violations: {response.text}", err=True)
+            
+    except Exception as e:
+        click.echo(f"❌ Error getting violations: {str(e)}", err=True)
+
+@pool_hub.command()
+@click.option('--test-mode', is_flag=True, help='Run in test mode')
+def capacity_snapshots(test_mode):
+    """Get capacity planning snapshots"""
+    try:
+        if test_mode:
+            # Mock data for testing
+            mock_snapshots = [
+                {
+                    "timestamp": "2024-03-15T00:00:00Z",
+                    "total_capacity": 1250,
+                    "available_capacity": 320,
+                    "utilization": 74.4,
+                    "active_miners": 42
+                },
+                {
+                    "timestamp": "2024-03-14T00:00:00Z",
+                    "total_capacity": 1200,
+                    "available_capacity": 350,
+                    "utilization": 70.8,
+                    "active_miners": 40
+                }
+            ]
+            
+            click.echo("📊 Capacity Snapshots:")
+            click.echo("=" * 50)
+            for s in mock_snapshots:
+                click.echo(f"🕐 Timestamp: {s['timestamp']}")
+                click.echo(f"   Total Capacity: {s['total_capacity']} GPU")
+                click.echo(f"   Available: {s['available_capacity']} GPU")
+                click.echo(f"   Utilization: {s['utilization']}%")
+                click.echo(f"   Active Miners: {s['active_miners']}")
+                click.echo("")
+            
+            return
+        
+        # Fetch from pool-hub service
+        config = get_config()
+        response = requests.get(
+            f"{config.pool_hub_url}/sla/capacity/snapshots",
+            timeout=30
+        )
+        
+        if response.status_code == 200:
+            snapshots = response.json()
+            
+            click.echo("📊 Capacity Snapshots:")
+            click.echo("=" * 50)
+            for s in snapshots:
+                click.echo(f"🕐 Timestamp: {s['timestamp']}")
+                click.echo(f"   Total Capacity: {s['total_capacity']} GPU")
+                click.echo(f"   Available: {s['available_capacity']} GPU")
+                click.echo(f"   Utilization: {s['utilization']}%")
+                click.echo(f"   Active Miners: {s['active_miners']}")
+                click.echo("")
+        else:
+            click.echo(f"❌ Failed to get snapshots: {response.text}", err=True)
+            
+    except Exception as e:
+        click.echo(f"❌ Error getting snapshots: {str(e)}", err=True)
+
+@pool_hub.command()
+@click.option('--test-mode', is_flag=True, help='Run in test mode')
+def capacity_forecast(test_mode):
+    """Get capacity forecast"""
+    try:
+        if test_mode:
+            # Mock data for testing
+            mock_forecast = {
+                "forecast_days": 7,
+                "current_capacity": 1250,
+                "projected_capacity": 1400,
+                "growth_rate": 12.0,
+                "daily_projections": [
+                    {"day": 1, "capacity": 1280},
+                    {"day": 2, "capacity": 1310},
+                    {"day": 3, "capacity": 1340},
+                    {"day": 7, "capacity": 1400}
+                ]
+            }
+            
+            click.echo("🔮 Capacity Forecast:")
+            click.echo("=" * 50)
+            click.echo(f"📅 Forecast Period: {mock_forecast['forecast_days']} days")
+            click.echo(f"📊 Current Capacity: {mock_forecast['current_capacity']} GPU")
+            click.echo(f"📈 Projected Capacity: {mock_forecast['projected_capacity']} GPU")
+            click.echo(f"📊 Growth Rate: {mock_forecast['growth_rate']}%")
+            click.echo("")
+            click.echo("Daily Projections:")
+            for p in mock_forecast['daily_projections']:
+                click.echo(f"   Day {p['day']}: {p['capacity']} GPU")
+            
+            return
+        
+        # Fetch from pool-hub service
+        config = get_config()
+        response = requests.get(
+            f"{config.pool_hub_url}/sla/capacity/forecast",
+            timeout=30
+        )
+        
+        if response.status_code == 200:
+            forecast = response.json()
+            
+            click.echo("🔮 Capacity Forecast:")
+            click.echo("=" * 50)
+            click.echo(f"📅 Forecast Period: {forecast['forecast_days']} days")
+            click.echo(f"📊 Current Capacity: {forecast['current_capacity']} GPU")
+            click.echo(f"📈 Projected Capacity: {forecast['projected_capacity']} GPU")
+            click.echo(f"📊 Growth Rate: {forecast['growth_rate']}%")
+            click.echo("")
+            click.echo("Daily Projections:")
+            for p in forecast['daily_projections']:
+                click.echo(f"   Day {p['day']}: {p['capacity']} GPU")
+        else:
+            click.echo(f"❌ Failed to get forecast: {response.text}", err=True)
+            
+    except Exception as e:
+        click.echo(f"❌ Error getting forecast: {str(e)}", err=True)
+
+@pool_hub.command()
+@click.option('--test-mode', is_flag=True, help='Run in test mode')
+def capacity_recommendations(test_mode):
+    """Get scaling recommendations"""
+    try:
+        if test_mode:
+            # Mock data for testing
+            mock_recommendations = [
+                {
+                    "type": "scale_up",
+                    "reason": "High utilization (>80%)",
+                    "action": "Add 50 GPU capacity",
+                    "priority": "high"
+                },
+                {
+                    "type": "optimize",
+                    "reason": "Imbalanced workload distribution",
+                    "action": "Rebalance miners across regions",
+                    "priority": "medium"
+                }
+            ]
+            
+            click.echo("💡 Capacity Recommendations:")
+            click.echo("=" * 50)
+            for r in mock_recommendations:
+                click.echo(f"📌 Type: {r['type']}")
+                click.echo(f"   Reason: {r['reason']}")
+                click.echo(f"   Action: {r['action']}")
+                click.echo(f"   Priority: {r['priority']}")
+                click.echo("")
+            
+            return
+        
+        # Fetch from pool-hub service
+        config = get_config()
+        response = requests.get(
+            f"{config.pool_hub_url}/sla/capacity/recommendations",
+            timeout=30
+        )
+        
+        if response.status_code == 200:
+            recommendations = response.json()
+            
+            click.echo("💡 Capacity Recommendations:")
+            click.echo("=" * 50)
+            for r in recommendations:
+                click.echo(f"📌 Type: {r['type']}")
+                click.echo(f"   Reason: {r['reason']}")
+                click.echo(f"   Action: {r['action']}")
+                click.echo(f"   Priority: {r['priority']}")
+                click.echo("")
+        else:
+            click.echo(f"❌ Failed to get recommendations: {response.text}", err=True)
+            
+    except Exception as e:
+        click.echo(f"❌ Error getting recommendations: {str(e)}", err=True)
+
+@pool_hub.command()
+@click.option('--test-mode', is_flag=True, help='Run in test mode')
+def billing_usage(test_mode):
+    """Get billing usage data"""
+    try:
+        if test_mode:
+            # Mock data for testing
+            mock_usage = {
+                "period_start": "2024-03-01T00:00:00Z",
+                "period_end": "2024-03-31T23:59:59Z",
+                "total_gpu_hours": 45678,
+                "total_api_calls": 1234567,
+                "total_compute_hours": 23456,
+                "total_cost": 12500.50,
+                "by_miner": [
+                    {"miner_id": "miner_001", "gpu_hours": 12000, "cost": 3280.50},
+                    {"miner_id": "miner_002", "gpu_hours": 8900, "cost": 2435.00}
+                ]
+            }
+            
+            click.echo("💰 Billing Usage:")
+            click.echo("=" * 50)
+            click.echo(f"📅 Period: {mock_usage['period_start']} to {mock_usage['period_end']}")
+            click.echo(f"⚡ Total GPU Hours: {mock_usage['total_gpu_hours']}")
+            click.echo(f"📞 Total API Calls: {mock_usage['total_api_calls']}")
+            click.echo(f"🖥️  Total Compute Hours: {mock_usage['total_compute_hours']}")
+            click.echo(f"💵 Total Cost: ${mock_usage['total_cost']:.2f}")
+            click.echo("")
+            click.echo("By Miner:")
+            for m in mock_usage['by_miner']:
+                click.echo(f"   {m['miner_id']}: {m['gpu_hours']} GPUh, ${m['cost']:.2f}")
+            
+            return
+        
+        # Fetch from pool-hub service
+        config = get_config()
+        response = requests.get(
+            f"{config.pool_hub_url}/sla/billing/usage",
+            timeout=30
+        )
+        
+        if response.status_code == 200:
+            usage = response.json()
+            
+            click.echo("💰 Billing Usage:")
+            click.echo("=" * 50)
+            click.echo(f"📅 Period: {usage['period_start']} to {usage['period_end']}")
+            click.echo(f"⚡ Total GPU Hours: {usage['total_gpu_hours']}")
+            click.echo(f"📞 Total API Calls: {usage['total_api_calls']}")
+            click.echo(f"🖥️  Total Compute Hours: {usage['total_compute_hours']}")
+            click.echo(f"💵 Total Cost: ${usage['total_cost']:.2f}")
+            click.echo("")
+            click.echo("By Miner:")
+            for m in usage['by_miner']:
+                click.echo(f"   {m['miner_id']}: {m['gpu_hours']} GPUh, ${m['cost']:.2f}")
+        else:
+            click.echo(f"❌ Failed to get billing usage: {response.text}", err=True)
+            
+    except Exception as e:
+        click.echo(f"❌ Error getting billing usage: {str(e)}", err=True)
+
+@pool_hub.command()
+@click.option('--test-mode', is_flag=True, help='Run in test mode')
+def billing_sync(test_mode):
+    """Trigger billing sync with coordinator-api"""
+    try:
+        if test_mode:
+            click.echo("🔄 Billing sync triggered (test mode)")
+            click.echo("✅ Sync completed successfully")
+            return
+        
+        # Trigger sync with pool-hub service
+        config = get_config()
+        response = requests.post(
+            f"{config.pool_hub_url}/sla/billing/sync",
+            timeout=60
+        )
+        
+        if response.status_code == 200:
+            result = response.json()
+            click.echo("🔄 Billing sync triggered")
+            click.echo(f"✅ Sync completed: {result.get('message', 'Success')}")
+        else:
+            click.echo(f"❌ Billing sync failed: {response.text}", err=True)
+            
+    except Exception as e:
+        click.echo(f"❌ Error triggering billing sync: {str(e)}", err=True)
+
+@pool_hub.command()
+@click.option('--test-mode', is_flag=True, help='Run in test mode')
+def collect_metrics(test_mode):
+    """Trigger SLA metrics collection"""
+    try:
+        if test_mode:
+            click.echo("📊 SLA metrics collection triggered (test mode)")
+            click.echo("✅ Collection completed successfully")
+            return
+        
+        # Trigger collection with pool-hub service
+        config = get_config()
+        response = requests.post(
+            f"{config.pool_hub_url}/sla/metrics/collect",
+            timeout=60
+        )
+        
+        if response.status_code == 200:
+            result = response.json()
+            click.echo("📊 SLA metrics collection triggered")
+            click.echo(f"✅ Collection completed: {result.get('message', 'Success')}")
+        else:
+            click.echo(f"❌ Metrics collection failed: {response.text}", err=True)
+            
+    except Exception as e:
+        click.echo(f"❌ Error triggering metrics collection: {str(e)}", err=True)
+
+# Helper function to get config
+def get_config():
+    """Get CLI configuration"""
+    try:
+        from config import get_config
+        return get_config()
+    except ImportError:
+        # Fallback for testing
+        from types import SimpleNamespace
+        return SimpleNamespace(
+            pool_hub_url="http://localhost:8012",
+            api_key="test-api-key"
+        )
+
+if __name__ == "__main__":
+    pool_hub()