Files
aitbc/apps/global-infrastructure/main.py
AITBC System b033923756 chore: normalize file permissions across repository
- Remove executable permissions from configuration files (.editorconfig, .env.example, .gitignore)
- Remove executable permissions from documentation files (README.md, LICENSE, SECURITY.md)
- Remove executable permissions from web assets (HTML, CSS, JS files)
- Remove executable permissions from data files (JSON, SQL, YAML, requirements.txt)
- Remove executable permissions from source code files across all apps
- Add executable permissions to Python
2026-03-08 11:26:18 +01:00

603 lines
22 KiB
Python
Executable File

"""
Global Infrastructure Deployment Service for AITBC
Handles multi-region deployment, load balancing, and global optimization
"""
import asyncio
import json
import logging
from datetime import datetime, timedelta
from pathlib import Path
from typing import Dict, Any, List, Optional
from fastapi import FastAPI, HTTPException
from pydantic import BaseModel
# Configure logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
app = FastAPI(
title="AITBC Global Infrastructure Service",
description="Global infrastructure deployment and multi-region optimization",
version="1.0.0"
)
# Data models
class Region(BaseModel):
region_id: str
name: str
location: str
endpoint: str
status: str # active, inactive, maintenance
capacity: int
current_load: float
latency_ms: float
compliance_level: str
class GlobalDeployment(BaseModel):
deployment_id: str
service_name: str
target_regions: List[str]
configuration: Dict[str, Any]
deployment_strategy: str # blue_green, canary, rolling
health_checks: List[str]
class LoadBalancer(BaseModel):
balancer_id: str
name: str
algorithm: str # round_robin, weighted, least_connections
target_regions: List[str]
health_check_interval: int
failover_threshold: int
class PerformanceMetrics(BaseModel):
region_id: str
timestamp: datetime
cpu_usage: float
memory_usage: float
network_io: float
disk_io: float
active_connections: int
response_time_ms: float
# In-memory storage (in production, use database)
global_regions: Dict[str, Dict] = {}
deployments: Dict[str, Dict] = {}
load_balancers: Dict[str, Dict] = {}
performance_metrics: Dict[str, List[Dict]] = {}
compliance_data: Dict[str, Dict] = {}
global_monitoring: Dict[str, Dict] = {}
@app.get("/")
async def root():
return {
"service": "AITBC Global Infrastructure Service",
"status": "running",
"timestamp": datetime.utcnow().isoformat(),
"version": "1.0.0"
}
@app.get("/health")
async def health_check():
return {
"status": "healthy",
"total_regions": len(global_regions),
"active_regions": len([r for r in global_regions.values() if r["status"] == "active"]),
"total_deployments": len(deployments),
"active_load_balancers": len([lb for lb in load_balancers.values() if lb["status"] == "active"])
}
@app.post("/api/v1/regions/register")
async def register_region(region: Region):
"""Register a new global region"""
if region.region_id in global_regions:
raise HTTPException(status_code=400, detail="Region already registered")
# Create region record
region_record = {
"region_id": region.region_id,
"name": region.name,
"location": region.location,
"endpoint": region.endpoint,
"status": region.status,
"capacity": region.capacity,
"current_load": region.current_load,
"latency_ms": region.latency_ms,
"compliance_level": region.compliance_level,
"created_at": datetime.utcnow().isoformat(),
"last_health_check": None,
"services_deployed": [],
"performance_history": []
}
global_regions[region.region_id] = region_record
logger.info(f"Region registered: {region.name} ({region.region_id})")
return {
"region_id": region.region_id,
"status": "registered",
"name": region.name,
"created_at": region_record["created_at"]
}
@app.get("/api/v1/regions")
async def list_regions():
"""List all registered regions"""
return {
"regions": list(global_regions.values()),
"total_regions": len(global_regions),
"active_regions": len([r for r in global_regions.values() if r["status"] == "active"])
}
@app.get("/api/v1/regions/{region_id}")
async def get_region(region_id: str):
"""Get detailed region information"""
if region_id not in global_regions:
raise HTTPException(status_code=404, detail="Region not found")
region = global_regions[region_id].copy()
# Add performance metrics
region["performance_metrics"] = performance_metrics.get(region_id, [])
# Add compliance data
region["compliance_data"] = compliance_data.get(region_id, {})
return region
@app.post("/api/v1/deployments/create")
async def create_deployment(deployment: GlobalDeployment):
"""Create a new global deployment"""
deployment_id = f"deploy_{int(datetime.utcnow().timestamp())}"
# Validate target regions
for region_id in deployment.target_regions:
if region_id not in global_regions:
raise HTTPException(status_code=400, detail=f"Region {region_id} not found")
# Create deployment record
deployment_record = {
"deployment_id": deployment_id,
"service_name": deployment.service_name,
"target_regions": deployment.target_regions,
"configuration": deployment.configuration,
"deployment_strategy": deployment.deployment_strategy,
"health_checks": deployment.health_checks,
"status": "pending",
"created_at": datetime.utcnow().isoformat(),
"started_at": None,
"completed_at": None,
"deployment_progress": {},
"rollback_available": False
}
deployments[deployment_id] = deployment_record
# Start async deployment
asyncio.create_task(execute_deployment(deployment_id))
logger.info(f"Deployment created: {deployment_id} for {deployment.service_name}")
return {
"deployment_id": deployment_id,
"status": "pending",
"service_name": deployment.service_name,
"target_regions": deployment.target_regions,
"created_at": deployment_record["created_at"]
}
@app.get("/api/v1/deployments/{deployment_id}")
async def get_deployment(deployment_id: str):
"""Get deployment status and details"""
if deployment_id not in deployments:
raise HTTPException(status_code=404, detail="Deployment not found")
return deployments[deployment_id]
@app.get("/api/v1/deployments")
async def list_deployments(status: Optional[str] = None):
"""List all deployments"""
deployment_list = list(deployments.values())
if status:
deployment_list = [d for d in deployment_list if d["status"] == status]
# Sort by creation date (most recent first)
deployment_list.sort(key=lambda x: x["created_at"], reverse=True)
return {
"deployments": deployment_list,
"total_deployments": len(deployment_list),
"status_filter": status
}
@app.post("/api/v1/load-balancers/create")
async def create_load_balancer(balancer: LoadBalancer):
"""Create a new load balancer"""
balancer_id = f"lb_{int(datetime.utcnow().timestamp())}"
# Validate target regions
for region_id in balancer.target_regions:
if region_id not in global_regions:
raise HTTPException(status_code=400, detail=f"Region {region_id} not found")
# Create load balancer record
balancer_record = {
"balancer_id": balancer_id,
"name": balancer.name,
"algorithm": balancer.algorithm,
"target_regions": balancer.target_regions,
"health_check_interval": balancer.health_check_interval,
"failover_threshold": balancer.failover_threshold,
"status": "active",
"created_at": datetime.utcnow().isoformat(),
"current_weights": {region_id: 1.0 for region_id in balancer.target_regions},
"health_status": {region_id: "healthy" for region_id in balancer.target_regions},
"total_requests": 0,
"failed_requests": 0
}
load_balancers[balancer_id] = balancer_record
# Start health checking
asyncio.create_task(start_health_monitoring(balancer_id))
logger.info(f"Load balancer created: {balancer_id} - {balancer.name}")
return {
"balancer_id": balancer_id,
"status": "active",
"name": balancer.name,
"algorithm": balancer.algorithm,
"created_at": balancer_record["created_at"]
}
@app.get("/api/v1/load-balancers")
async def list_load_balancers():
"""List all load balancers"""
return {
"load_balancers": list(load_balancers.values()),
"total_balancers": len(load_balancers),
"active_balancers": len([lb for lb in load_balancers.values() if lb["status"] == "active"])
}
@app.post("/api/v1/performance/metrics")
async def record_performance_metrics(metrics: PerformanceMetrics):
"""Record performance metrics for a region"""
metrics_record = {
"metrics_id": f"metrics_{int(datetime.utcnow().timestamp())}",
"region_id": metrics.region_id,
"timestamp": metrics.timestamp.isoformat(),
"cpu_usage": metrics.cpu_usage,
"memory_usage": metrics.memory_usage,
"network_io": metrics.network_io,
"disk_io": metrics.disk_io,
"active_connections": metrics.active_connections,
"response_time_ms": metrics.response_time_ms
}
if metrics.region_id not in performance_metrics:
performance_metrics[metrics.region_id] = []
performance_metrics[metrics.region_id].append(metrics_record)
# Keep only last 1000 records per region
if len(performance_metrics[metrics.region_id]) > 1000:
performance_metrics[metrics.region_id] = performance_metrics[metrics.region_id][-1000:]
# Update region performance history
if metrics.region_id in global_regions:
global_regions[metrics.region_id]["performance_history"].append({
"timestamp": metrics.timestamp.isoformat(),
"cpu_usage": metrics.cpu_usage,
"memory_usage": metrics.memory_usage,
"response_time_ms": metrics.response_time_ms
})
# Keep only last 100 records
if len(global_regions[metrics.region_id]["performance_history"]) > 100:
global_regions[metrics.region_id]["performance_history"] = global_regions[metrics.region_id]["performance_history"][-100:]
return {
"metrics_id": metrics_record["metrics_id"],
"status": "recorded",
"timestamp": metrics_record["timestamp"]
}
@app.get("/api/v1/performance/{region_id}")
async def get_region_performance(region_id: str, hours: int = 24):
"""Get performance metrics for a region"""
if region_id not in performance_metrics:
raise HTTPException(status_code=404, detail="No performance data for region")
cutoff_time = datetime.utcnow() - timedelta(hours=hours)
recent_metrics = [
m for m in performance_metrics[region_id]
if datetime.fromisoformat(m["timestamp"]) > cutoff_time
]
# Calculate statistics
if recent_metrics:
avg_cpu = sum(m["cpu_usage"] for m in recent_metrics) / len(recent_metrics)
avg_memory = sum(m["memory_usage"] for m in recent_metrics) / len(recent_metrics)
avg_response_time = sum(m["response_time_ms"] for m in recent_metrics) / len(recent_metrics)
else:
avg_cpu = avg_memory = avg_response_time = 0.0
return {
"region_id": region_id,
"period_hours": hours,
"metrics": recent_metrics,
"statistics": {
"average_cpu_usage": round(avg_cpu, 2),
"average_memory_usage": round(avg_memory, 2),
"average_response_time_ms": round(avg_response_time, 2),
"total_samples": len(recent_metrics)
},
"generated_at": datetime.utcnow().isoformat()
}
@app.get("/api/v1/compliance/{region_id}")
async def get_region_compliance(region_id: str):
"""Get compliance information for a region"""
if region_id not in global_regions:
raise HTTPException(status_code=404, detail="Region not found")
# Mock compliance data (in production, this would be from actual compliance systems)
compliance_info = {
"region_id": region_id,
"region_name": global_regions[region_id]["name"],
"compliance_level": global_regions[region_id]["compliance_level"],
"certifications": ["SOC2", "ISO27001", "GDPR"],
"data_residency": "compliant",
"last_audit": (datetime.utcnow() - timedelta(days=90)).isoformat(),
"next_audit": (datetime.utcnow() + timedelta(days=275)).isoformat(),
"regulations": ["GDPR", "CCPA", "PDPA"],
"data_protection": "end-to-end-encryption",
"access_controls": "role-based-access",
"audit_logging": "enabled"
}
return compliance_info
@app.get("/api/v1/global/dashboard")
async def get_global_dashboard():
"""Get global infrastructure dashboard"""
# Calculate global statistics
total_capacity = sum(r["capacity"] for r in global_regions.values())
total_load = sum(r["current_load"] for r in global_regions.values())
avg_latency = sum(r["latency_ms"] for r in global_regions.values()) / len(global_regions) if global_regions else 0
# Deployment statistics
deployment_stats = {
"total": len(deployments),
"pending": len([d for d in deployments.values() if d["status"] == "pending"]),
"in_progress": len([d for d in deployments.values() if d["status"] == "in_progress"]),
"completed": len([d for d in deployments.values() if d["status"] == "completed"]),
"failed": len([d for d in deployments.values() if d["status"] == "failed"])
}
# Performance summary
performance_summary = {}
for region_id, metrics_list in performance_metrics.items():
if metrics_list:
latest_metrics = metrics_list[-1]
performance_summary[region_id] = {
"cpu_usage": latest_metrics["cpu_usage"],
"memory_usage": latest_metrics["memory_usage"],
"response_time_ms": latest_metrics["response_time_ms"],
"active_connections": latest_metrics["active_connections"]
}
return {
"dashboard": {
"infrastructure": {
"total_regions": len(global_regions),
"active_regions": len([r for r in global_regions.values() if r["status"] == "active"]),
"total_capacity": total_capacity,
"current_load": total_load,
"utilization_percentage": round((total_load / total_capacity * 100) if total_capacity > 0 else 0, 2),
"average_latency_ms": round(avg_latency, 2)
},
"deployments": deployment_stats,
"load_balancers": {
"total": len(load_balancers),
"active": len([lb for lb in load_balancers.values() if lb["status"] == "active"])
},
"performance": performance_summary,
"compliance": {
"compliant_regions": len([r for r in global_regions.values() if r["compliance_level"] == "full"]),
"partial_compliance": len([r for r in global_regions.values() if r["compliance_level"] == "partial"])
}
},
"generated_at": datetime.utcnow().isoformat()
}
# Core deployment and load balancing functions
async def execute_deployment(deployment_id: str):
"""Execute a global deployment"""
deployment = deployments[deployment_id]
deployment["status"] = "in_progress"
deployment["started_at"] = datetime.utcnow().isoformat()
try:
for region_id in deployment["target_regions"]:
deployment["deployment_progress"][region_id] = {
"status": "deploying",
"started_at": datetime.utcnow().isoformat(),
"progress": 0
}
# Simulate deployment process
await simulate_deployment_step(region_id, deployment_id)
deployment["deployment_progress"][region_id].update({
"status": "completed",
"completed_at": datetime.utcnow().isoformat(),
"progress": 100
})
# Update region services
if region_id in global_regions:
if deployment["service_name"] not in global_regions[region_id]["services_deployed"]:
global_regions[region_id]["services_deployed"].append(deployment["service_name"])
deployment["status"] = "completed"
deployment["completed_at"] = datetime.utcnow().isoformat()
logger.info(f"Deployment completed: {deployment_id}")
except Exception as e:
deployment["status"] = "failed"
deployment["error"] = str(e)
logger.error(f"Deployment failed: {deployment_id} - {str(e)}")
async def simulate_deployment_step(region_id: str, deployment_id: str):
"""Simulate deployment step for demo"""
deployment = deployments[deployment_id]
# Simulate deployment progress
for progress in range(0, 101, 10):
if region_id in deployment["deployment_progress"]:
deployment["deployment_progress"][region_id]["progress"] = progress
await asyncio.sleep(0.1) # Simulate work
async def start_health_monitoring(balancer_id: str):
"""Start health monitoring for a load balancer"""
balancer = load_balancers[balancer_id]
while balancer["status"] == "active":
try:
# Check health of target regions
for region_id in balancer["target_regions"]:
if region_id in global_regions:
region = global_regions[region_id]
# Simulate health check (in production, this would be actual health checks)
is_healthy = region["status"] == "active" and region["current_load"] < region["capacity"] * 0.9
balancer["health_status"][region_id] = "healthy" if is_healthy else "unhealthy"
# Update load balancer weights based on performance
update_load_balancer_weights(balancer_id)
await asyncio.sleep(balancer["health_check_interval"])
except Exception as e:
logger.error(f"Health monitoring error for {balancer_id}: {str(e)}")
await asyncio.sleep(10)
def update_load_balancer_weights(balancer_id: str):
"""Update load balancer weights based on region performance"""
balancer = load_balancers[balancer_id]
if balancer["algorithm"] == "weighted":
# Calculate weights based on capacity and current load
for region_id in balancer["target_regions"]:
if region_id in global_regions:
region = global_regions[region_id]
# Weight based on available capacity
available_capacity = region["capacity"] - region["current_load"]
total_available = sum(
global_regions[r]["capacity"] - global_regions[r]["current_load"]
for r in balancer["target_regions"]
if r in global_regions
)
if total_available > 0:
weight = available_capacity / total_available
balancer["current_weights"][region_id] = round(weight, 3)
# Background task for global monitoring
async def global_monitoring_task():
"""Background task for global infrastructure monitoring"""
while True:
await asyncio.sleep(60) # Monitor every minute
# Update global monitoring data
global_monitoring["last_update"] = datetime.utcnow().isoformat()
global_monitoring["total_requests"] = sum(lb.get("total_requests", 0) for lb in load_balancers.values())
global_monitoring["failed_requests"] = sum(lb.get("failed_requests", 0) for lb in load_balancers.values())
# Check for regions that need attention
for region_id, region in global_regions.items():
if region["current_load"] > region["capacity"] * 0.8:
logger.warning(f"High load detected in region {region_id}: {region['current_load']}/{region['capacity']}")
if region["latency_ms"] > 500:
logger.warning(f"High latency detected in region {region_id}: {region['latency_ms']}ms")
# Initialize with some default regions
@app.on_event("startup")
async def startup_event():
logger.info("Starting AITBC Global Infrastructure Service")
# Initialize default regions
default_regions = [
{
"region_id": "us-east-1",
"name": "US East (N. Virginia)",
"location": "North America",
"endpoint": "https://us-east-1.api.aitbc.dev",
"status": "active",
"capacity": 10000,
"current_load": 3500,
"latency_ms": 45,
"compliance_level": "full"
},
{
"region_id": "eu-west-1",
"name": "EU West (Ireland)",
"location": "Europe",
"endpoint": "https://eu-west-1.api.aitbc.dev",
"status": "active",
"capacity": 8000,
"current_load": 2800,
"latency_ms": 38,
"compliance_level": "full"
},
{
"region_id": "ap-southeast-1",
"name": "AP Southeast (Singapore)",
"location": "Asia Pacific",
"endpoint": "https://ap-southeast-1.api.aitbc.dev",
"status": "active",
"capacity": 6000,
"current_load": 2200,
"latency_ms": 62,
"compliance_level": "partial"
}
]
for region_data in default_regions:
region = Region(**region_data)
region_record = {
"region_id": region.region_id,
"name": region.name,
"location": region.location,
"endpoint": region.endpoint,
"status": region.status,
"capacity": region.capacity,
"current_load": region.current_load,
"latency_ms": region.latency_ms,
"compliance_level": region.compliance_level,
"created_at": datetime.utcnow().isoformat(),
"last_health_check": None,
"services_deployed": [],
"performance_history": []
}
global_regions[region.region_id] = region_record
# Start global monitoring
asyncio.create_task(global_monitoring_task())
@app.on_event("shutdown")
async def shutdown_event():
logger.info("Shutting down AITBC Global Infrastructure Service")
if __name__ == "__main__":
import uvicorn
uvicorn.run(app, host="0.0.0.0", port=8017, log_level="info")