```
chore: enhance .gitignore and remove obsolete documentation files - Reorganize .gitignore with categorized sections for better maintainability - Add comprehensive ignore patterns for Python, Node.js, databases, logs, and build artifacts - Add project-specific ignore rules for coordinator, explorer, and deployment files - Remove outdated documentation: BITCOIN-WALLET-SETUP.md, LOCAL_ASSETS_SUMMARY.md, README-CONTAINER-DEPLOYMENT.md, README-DOMAIN-DEPLOYMENT.md ```
This commit is contained in:
76
.windsurf/skills/deploy-production/SKILL.md
Normal file
76
.windsurf/skills/deploy-production/SKILL.md
Normal file
@@ -0,0 +1,76 @@
|
||||
---
|
||||
name: deploy-production
|
||||
description: Automated production deployment workflow for AITBC blockchain components
|
||||
version: 1.0.0
|
||||
author: Cascade
|
||||
tags: [deployment, production, blockchain, aitbc]
|
||||
---
|
||||
|
||||
# Production Deployment Skill
|
||||
|
||||
This skill provides a standardized workflow for deploying AITBC components to production environments.
|
||||
|
||||
## Overview
|
||||
|
||||
The production deployment skill ensures safe, consistent, and verifiable deployments of all AITBC stack components including:
|
||||
- Coordinator services
|
||||
- Blockchain node
|
||||
- Miner daemon
|
||||
- Web applications
|
||||
- Infrastructure components
|
||||
|
||||
## Prerequisites
|
||||
|
||||
- Production server access configured
|
||||
- SSL certificates installed
|
||||
- Environment variables set
|
||||
- Backup procedures in place
|
||||
- Monitoring systems active
|
||||
|
||||
## Deployment Steps
|
||||
|
||||
### 1. Pre-deployment Checks
|
||||
- Run health checks on all services
|
||||
- Verify backup integrity
|
||||
- Check disk space and resources
|
||||
- Validate configuration files
|
||||
- Review recent changes
|
||||
|
||||
### 2. Environment Preparation
|
||||
- Update dependencies
|
||||
- Build new artifacts
|
||||
- Run smoke tests
|
||||
- Prepare rollback plan
|
||||
|
||||
### 3. Deployment Execution
|
||||
- Stop services gracefully
|
||||
- Deploy new code
|
||||
- Update configurations
|
||||
- Restart services
|
||||
- Verify health status
|
||||
|
||||
### 4. Post-deployment Verification
|
||||
- Run integration tests
|
||||
- Check API endpoints
|
||||
- Verify blockchain sync
|
||||
- Monitor system metrics
|
||||
- Validate user access
|
||||
|
||||
## Supporting Files
|
||||
|
||||
- `pre-deploy-checks.sh` - Automated pre-deployment validation
|
||||
- `environment-template.env` - Production environment template
|
||||
- `rollback-steps.md` - Emergency rollback procedures
|
||||
- `health-check.py` - Service health verification script
|
||||
|
||||
## Usage
|
||||
|
||||
This skill is automatically invoked when you request production deployment. You can also manually invoke it by mentioning "deploy production" or "production deployment".
|
||||
|
||||
## Safety Features
|
||||
|
||||
- Automatic rollback on failure
|
||||
- Service health monitoring
|
||||
- Configuration validation
|
||||
- Backup verification
|
||||
- Rollback checkpoint creation
|
||||
238
.windsurf/skills/deploy-production/health-check.py
Executable file
238
.windsurf/skills/deploy-production/health-check.py
Executable file
@@ -0,0 +1,238 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
AITBC Production Health Check Script
|
||||
Verifies the health of all AITBC services after deployment
|
||||
"""
|
||||
|
||||
import requests
|
||||
import json
|
||||
import sys
|
||||
import time
|
||||
from datetime import datetime
|
||||
from typing import Dict, List, Tuple
|
||||
|
||||
# Configuration
|
||||
SERVICES = {
|
||||
"coordinator": {
|
||||
"url": "http://localhost:8080/health",
|
||||
"expected_status": 200,
|
||||
"timeout": 10
|
||||
},
|
||||
"blockchain-node": {
|
||||
"url": "http://localhost:8545",
|
||||
"method": "POST",
|
||||
"payload": {
|
||||
"jsonrpc": "2.0",
|
||||
"method": "eth_blockNumber",
|
||||
"params": [],
|
||||
"id": 1
|
||||
},
|
||||
"expected_status": 200,
|
||||
"timeout": 10
|
||||
},
|
||||
"dashboard": {
|
||||
"url": "https://aitbc.io/health",
|
||||
"expected_status": 200,
|
||||
"timeout": 10
|
||||
},
|
||||
"api": {
|
||||
"url": "https://api.aitbc.io/v1/status",
|
||||
"expected_status": 200,
|
||||
"timeout": 10
|
||||
},
|
||||
"miner": {
|
||||
"url": "http://localhost:8081/api/status",
|
||||
"expected_status": 200,
|
||||
"timeout": 10
|
||||
}
|
||||
}
|
||||
|
||||
# Colors for output
|
||||
class Colors:
|
||||
GREEN = '\033[92m'
|
||||
RED = '\033[91m'
|
||||
YELLOW = '\033[93m'
|
||||
BLUE = '\033[94m'
|
||||
ENDC = '\033[0m'
|
||||
|
||||
def print_status(message: str, status: str = "INFO"):
|
||||
"""Print colored status message"""
|
||||
timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
|
||||
if status == "SUCCESS":
|
||||
print(f"{Colors.GREEN}[✓]{Colors.ENDC} {timestamp} - {message}")
|
||||
elif status == "ERROR":
|
||||
print(f"{Colors.RED}[✗]{Colors.ENDC} {timestamp} - {message}")
|
||||
elif status == "WARNING":
|
||||
print(f"{Colors.YELLOW}[⚠]{Colors.ENDC} {timestamp} - {message}")
|
||||
else:
|
||||
print(f"{Colors.BLUE}[ℹ]{Colors.ENDC} {timestamp} - {message}")
|
||||
|
||||
def check_service(name: str, config: Dict) -> Tuple[bool, str]:
|
||||
"""Check individual service health"""
|
||||
try:
|
||||
method = config.get('method', 'GET')
|
||||
timeout = config.get('timeout', 10)
|
||||
expected_status = config.get('expected_status', 200)
|
||||
|
||||
if method == 'POST':
|
||||
response = requests.post(
|
||||
config['url'],
|
||||
json=config.get('payload', {}),
|
||||
timeout=timeout,
|
||||
headers={'Content-Type': 'application/json'}
|
||||
)
|
||||
else:
|
||||
response = requests.get(config['url'], timeout=timeout)
|
||||
|
||||
if response.status_code == expected_status:
|
||||
# Additional checks for specific services
|
||||
if name == "blockchain-node":
|
||||
data = response.json()
|
||||
if 'result' in data:
|
||||
block_number = int(data['result'], 16)
|
||||
return True, f"Block number: {block_number}"
|
||||
return False, "Invalid response format"
|
||||
|
||||
elif name == "coordinator":
|
||||
data = response.json()
|
||||
if data.get('status') == 'healthy':
|
||||
return True, f"Version: {data.get('version', 'unknown')}"
|
||||
return False, f"Status: {data.get('status')}"
|
||||
|
||||
return True, f"Status: {response.status_code}"
|
||||
else:
|
||||
return False, f"HTTP {response.status_code}"
|
||||
|
||||
except requests.exceptions.Timeout:
|
||||
return False, "Timeout"
|
||||
except requests.exceptions.ConnectionError:
|
||||
return False, "Connection refused"
|
||||
except Exception as e:
|
||||
return False, str(e)
|
||||
|
||||
def check_database() -> Tuple[bool, str]:
|
||||
"""Check database connectivity"""
|
||||
try:
|
||||
# This would use your actual database connection
|
||||
import psycopg2
|
||||
conn = psycopg2.connect(
|
||||
host="localhost",
|
||||
database="aitbc_prod",
|
||||
user="postgres",
|
||||
password="your_password"
|
||||
)
|
||||
cursor = conn.cursor()
|
||||
cursor.execute("SELECT 1")
|
||||
cursor.close()
|
||||
conn.close()
|
||||
return True, "Database connected"
|
||||
except Exception as e:
|
||||
return False, str(e)
|
||||
|
||||
def check_redis() -> Tuple[bool, str]:
|
||||
"""Check Redis connectivity"""
|
||||
try:
|
||||
import redis
|
||||
r = redis.Redis(host='localhost', port=6379, db=0)
|
||||
r.ping()
|
||||
return True, "Redis connected"
|
||||
except Exception as e:
|
||||
return False, str(e)
|
||||
|
||||
def check_disk_space() -> Tuple[bool, str]:
|
||||
"""Check disk space usage"""
|
||||
import shutil
|
||||
total, used, free = shutil.disk_usage("/")
|
||||
percent_used = (used / total) * 100
|
||||
if percent_used < 80:
|
||||
return True, f"Disk usage: {percent_used:.1f}%"
|
||||
else:
|
||||
return False, f"Disk usage critical: {percent_used:.1f}%"
|
||||
|
||||
def check_ssl_certificates() -> Tuple[bool, str]:
|
||||
"""Check SSL certificate validity"""
|
||||
import ssl
|
||||
import socket
|
||||
from datetime import datetime
|
||||
|
||||
try:
|
||||
context = ssl.create_default_context()
|
||||
with socket.create_connection(("aitbc.io", 443)) as sock:
|
||||
with context.wrap_socket(sock, server_hostname="aitbc.io") as ssock:
|
||||
cert = ssock.getpeercert()
|
||||
expiry_date = datetime.strptime(cert['notAfter'], '%b %d %H:%M:%S %Y %Z')
|
||||
days_until_expiry = (expiry_date - datetime.now()).days
|
||||
|
||||
if days_until_expiry > 7:
|
||||
return True, f"SSL valid for {days_until_expiry} days"
|
||||
else:
|
||||
return False, f"SSL expires in {days_until_expiry} days"
|
||||
except Exception as e:
|
||||
return False, str(e)
|
||||
|
||||
def main():
|
||||
"""Main health check function"""
|
||||
print_status("Starting AITBC Production Health Check", "INFO")
|
||||
print("=" * 60)
|
||||
|
||||
all_passed = True
|
||||
failed_services = []
|
||||
|
||||
# Check all services
|
||||
print_status("\n=== Service Health Checks ===")
|
||||
for name, config in SERVICES.items():
|
||||
success, message = check_service(name, config)
|
||||
if success:
|
||||
print_status(f"{name}: {message}", "SUCCESS")
|
||||
else:
|
||||
print_status(f"{name}: {message}", "ERROR")
|
||||
all_passed = False
|
||||
failed_services.append(name)
|
||||
|
||||
# Check infrastructure components
|
||||
print_status("\n=== Infrastructure Checks ===")
|
||||
|
||||
# Database
|
||||
db_success, db_message = check_database()
|
||||
if db_success:
|
||||
print_status(f"Database: {db_message}", "SUCCESS")
|
||||
else:
|
||||
print_status(f"Database: {db_message}", "ERROR")
|
||||
all_passed = False
|
||||
|
||||
# Redis
|
||||
redis_success, redis_message = check_redis()
|
||||
if redis_success:
|
||||
print_status(f"Redis: {redis_message}", "SUCCESS")
|
||||
else:
|
||||
print_status(f"Redis: {redis_message}", "ERROR")
|
||||
all_passed = False
|
||||
|
||||
# Disk space
|
||||
disk_success, disk_message = check_disk_space()
|
||||
if disk_success:
|
||||
print_status(f"Disk: {disk_message}", "SUCCESS")
|
||||
else:
|
||||
print_status(f"Disk: {disk_message}", "ERROR")
|
||||
all_passed = False
|
||||
|
||||
# SSL certificates
|
||||
ssl_success, ssl_message = check_ssl_certificates()
|
||||
if ssl_success:
|
||||
print_status(f"SSL: {ssl_message}", "SUCCESS")
|
||||
else:
|
||||
print_status(f"SSL: {ssl_message}", "ERROR")
|
||||
all_passed = False
|
||||
|
||||
# Summary
|
||||
print("\n" + "=" * 60)
|
||||
if all_passed:
|
||||
print_status("All checks passed! System is healthy.", "SUCCESS")
|
||||
sys.exit(0)
|
||||
else:
|
||||
print_status(f"Health check failed! Failed services: {', '.join(failed_services)}", "ERROR")
|
||||
print_status("Please check the logs and investigate the issues.", "WARNING")
|
||||
sys.exit(1)
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
102
.windsurf/skills/deploy-production/pre-deploy-checks.sh
Executable file
102
.windsurf/skills/deploy-production/pre-deploy-checks.sh
Executable file
@@ -0,0 +1,102 @@
|
||||
#!/bin/bash
|
||||
|
||||
# Pre-deployment checks for AITBC production deployment
|
||||
# This script validates system readiness before deployment
|
||||
|
||||
set -e
|
||||
|
||||
echo "=== AITBC Production Pre-deployment Checks ==="
|
||||
|
||||
# Colors for output
|
||||
RED='\033[0;31m'
|
||||
GREEN='\033[0;32m'
|
||||
YELLOW='\033[1;33m'
|
||||
NC='\033[0m' # No Color
|
||||
|
||||
# Function to print status
|
||||
check_status() {
|
||||
if [ $? -eq 0 ]; then
|
||||
echo -e "${GREEN}✓${NC} $1"
|
||||
else
|
||||
echo -e "${RED}✗${NC} $1"
|
||||
exit 1
|
||||
fi
|
||||
}
|
||||
|
||||
warning() {
|
||||
echo -e "${YELLOW}⚠${NC} $1"
|
||||
}
|
||||
|
||||
# 1. Check disk space
|
||||
echo -e "\n1. Checking disk space..."
|
||||
DISK_USAGE=$(df / | awk 'NR==2 {print $5}' | sed 's/%//')
|
||||
if [ $DISK_USAGE -lt 80 ]; then
|
||||
check_status "Disk space usage: ${DISK_USAGE}%"
|
||||
else
|
||||
warning "Disk space usage is high: ${DISK_USAGE}%"
|
||||
fi
|
||||
|
||||
# 2. Check memory usage
|
||||
echo -e "\n2. Checking memory usage..."
|
||||
MEM_AVAILABLE=$(free -m | awk 'NR==2{printf "%.0f", $7}')
|
||||
if [ $MEM_AVAILABLE -gt 1024 ]; then
|
||||
check_status "Available memory: ${MEM_AVAILABLE}MB"
|
||||
else
|
||||
warning "Low memory available: ${MEM_AVAILABLE}MB"
|
||||
fi
|
||||
|
||||
# 3. Check service status
|
||||
echo -e "\n3. Checking critical services..."
|
||||
services=("nginx" "docker" "postgresql")
|
||||
for service in "${services[@]}"; do
|
||||
if systemctl is-active --quiet $service; then
|
||||
check_status "$service is running"
|
||||
else
|
||||
echo -e "${RED}✗${NC} $service is not running"
|
||||
fi
|
||||
done
|
||||
|
||||
# 4. Check SSL certificates
|
||||
echo -e "\n4. Checking SSL certificates..."
|
||||
if [ -f "/etc/letsencrypt/live/$(hostname)/fullchain.pem" ]; then
|
||||
EXPIRY=$(openssl x509 -in /etc/letsencrypt/live/$(hostname)/fullchain.pem -noout -enddate | cut -d= -f2)
|
||||
check_status "SSL certificate valid until: $EXPIRY"
|
||||
else
|
||||
warning "SSL certificate not found"
|
||||
fi
|
||||
|
||||
# 5. Check backup
|
||||
echo -e "\n5. Checking recent backup..."
|
||||
BACKUP_DIR="/var/backups/aitbc"
|
||||
if [ -d "$BACKUP_DIR" ]; then
|
||||
LATEST_BACKUP=$(ls -lt $BACKUP_DIR | head -n 2 | tail -n 1 | awk '{print $9}')
|
||||
if [ -n "$LATEST_BACKUP" ]; then
|
||||
check_status "Latest backup: $LATEST_BACKUP"
|
||||
else
|
||||
warning "No recent backup found"
|
||||
fi
|
||||
else
|
||||
warning "Backup directory not found"
|
||||
fi
|
||||
|
||||
# 6. Check environment variables
|
||||
echo -e "\n6. Checking environment configuration..."
|
||||
if [ -f "/etc/environment" ] && grep -q "AITBC_ENV=production" /etc/environment; then
|
||||
check_status "Production environment configured"
|
||||
else
|
||||
warning "Production environment not set"
|
||||
fi
|
||||
|
||||
# 7. Check ports
|
||||
echo -e "\n7. Checking required ports..."
|
||||
ports=("80" "443" "8080" "8545")
|
||||
for port in "${ports[@]}"; do
|
||||
if netstat -tuln | grep -q ":$port "; then
|
||||
check_status "Port $port is listening"
|
||||
else
|
||||
warning "Port $port is not listening"
|
||||
fi
|
||||
done
|
||||
|
||||
echo -e "\n=== Pre-deployment checks completed ==="
|
||||
echo -e "${GREEN}Ready for deployment!${NC}"
|
||||
187
.windsurf/skills/deploy-production/rollback-steps.md
Normal file
187
.windsurf/skills/deploy-production/rollback-steps.md
Normal file
@@ -0,0 +1,187 @@
|
||||
# Production Rollback Procedures
|
||||
|
||||
## Emergency Rollback Guide
|
||||
|
||||
Use these procedures when a deployment causes critical issues in production.
|
||||
|
||||
### Immediate Actions (First 5 minutes)
|
||||
|
||||
1. **Assess the Impact**
|
||||
- Check monitoring dashboards
|
||||
- Review error logs
|
||||
- Identify affected services
|
||||
- Determine if rollback is necessary
|
||||
|
||||
2. **Communicate**
|
||||
- Notify team in #production-alerts
|
||||
- Post status on status page if needed
|
||||
- Document start time of incident
|
||||
|
||||
### Automated Rollback (if available)
|
||||
|
||||
```bash
|
||||
# Quick rollback to previous version
|
||||
./scripts/rollback-to-previous.sh
|
||||
|
||||
# Rollback to specific version
|
||||
./scripts/rollback-to-version.sh v1.2.3
|
||||
```
|
||||
|
||||
### Manual Rollback Steps
|
||||
|
||||
#### 1. Stop Current Services
|
||||
```bash
|
||||
# Stop all AITBC services
|
||||
sudo systemctl stop aitbc-coordinator
|
||||
sudo systemctl stop aitbc-node
|
||||
sudo systemctl stop aitbc-miner
|
||||
sudo systemctl stop aitbc-dashboard
|
||||
sudo docker-compose down
|
||||
```
|
||||
|
||||
#### 2. Restore Previous Code
|
||||
```bash
|
||||
# Get previous deployment tag
|
||||
git tag --sort=-version:refname | head -n 5
|
||||
|
||||
# Checkout previous stable version
|
||||
git checkout v1.2.3
|
||||
|
||||
# Rebuild if necessary
|
||||
docker-compose build --no-cache
|
||||
```
|
||||
|
||||
#### 3. Restore Database (if needed)
|
||||
```bash
|
||||
# List available backups
|
||||
aws s3 ls s3://aitbc-backups/database/
|
||||
|
||||
# Restore latest backup
|
||||
pg_restore -h localhost -U postgres -d aitbc_prod latest_backup.dump
|
||||
```
|
||||
|
||||
#### 4. Restore Configuration
|
||||
```bash
|
||||
# Restore from backup
|
||||
cp /etc/aitbc/backup/config.yaml /etc/aitbc/config.yaml
|
||||
cp /etc/aitbc/backup/.env /etc/aitbc/.env
|
||||
```
|
||||
|
||||
#### 5. Restart Services
|
||||
```bash
|
||||
# Start services in correct order
|
||||
sudo systemctl start aitbc-coordinator
|
||||
sleep 10
|
||||
sudo systemctl start aitbc-node
|
||||
sleep 10
|
||||
sudo systemctl start aitbc-miner
|
||||
sleep 10
|
||||
sudo systemctl start aitbc-dashboard
|
||||
```
|
||||
|
||||
#### 6. Verify Rollback
|
||||
```bash
|
||||
# Check service status
|
||||
./scripts/health-check.sh
|
||||
|
||||
# Run smoke tests
|
||||
./scripts/smoke-test.sh
|
||||
|
||||
# Verify blockchain sync
|
||||
curl -X POST http://localhost:8545 -H "Content-Type: application/json" -d '{"jsonrpc":"2.0","method":"eth_syncing","params":[],"id":1}'
|
||||
```
|
||||
|
||||
### Database-Specific Rollbacks
|
||||
|
||||
#### Partial Data Rollback
|
||||
```bash
|
||||
# Create backup before changes
|
||||
pg_dump -h localhost -U postgres aitbc_prod > pre-rollback-backup.sql
|
||||
|
||||
# Rollback specific tables
|
||||
psql -h localhost -U postgres -d aitbc_prod < rollback-tables.sql
|
||||
```
|
||||
|
||||
#### Migration Rollback
|
||||
```bash
|
||||
# Check migration status
|
||||
./scripts/migration-status.sh
|
||||
|
||||
# Rollback last migration
|
||||
./scripts/rollback-migration.sh
|
||||
```
|
||||
|
||||
### Service-Specific Rollbacks
|
||||
|
||||
#### Coordinator Service
|
||||
```bash
|
||||
# Restore coordinator state
|
||||
sudo systemctl stop aitbc-coordinator
|
||||
cp /var/lib/aitbc/coordinator/backup/state.db /var/lib/aitbc/coordinator/
|
||||
sudo systemctl start aitbc-coordinator
|
||||
```
|
||||
|
||||
#### Blockchain Node
|
||||
```bash
|
||||
# Reset to last stable block
|
||||
sudo systemctl stop aitbc-node
|
||||
aitbc-node --reset-to-block 123456
|
||||
sudo systemctl start aitbc-node
|
||||
```
|
||||
|
||||
#### Mining Operations
|
||||
```bash
|
||||
# Stop mining immediately
|
||||
curl -X POST http://localhost:8080/api/mining/stop
|
||||
|
||||
# Reset mining state
|
||||
redis-cli FLUSHDB
|
||||
```
|
||||
|
||||
### Verification Checklist
|
||||
|
||||
- [ ] All services running
|
||||
- [ ] Database connectivity
|
||||
- [ ] API endpoints responding
|
||||
- [ ] Blockchain syncing
|
||||
- [ ] Mining operations (if applicable)
|
||||
- [ ] Dashboard accessible
|
||||
- [ ] SSL certificates valid
|
||||
- [ ] Monitoring alerts cleared
|
||||
|
||||
### Post-Rollback Actions
|
||||
|
||||
1. **Root Cause Analysis**
|
||||
- Document what went wrong
|
||||
- Identify failure point
|
||||
- Create prevention plan
|
||||
|
||||
2. **Team Communication**
|
||||
- Update incident ticket
|
||||
- Share lessons learned
|
||||
- Update runbooks
|
||||
|
||||
3. **Preventive Measures**
|
||||
- Add additional tests
|
||||
- Improve monitoring
|
||||
- Update deployment checklist
|
||||
|
||||
### Contact Information
|
||||
|
||||
- **On-call Engineer**: [Phone/Slack]
|
||||
- **Engineering Lead**: [Phone/Slack]
|
||||
- **DevOps Team**: #devops-alerts
|
||||
- **Management**: #management-alerts
|
||||
|
||||
### Escalation
|
||||
|
||||
1. **Level 1**: On-call engineer (first 15 minutes)
|
||||
2. **Level 2**: Engineering lead (after 15 minutes)
|
||||
3. **Level 3**: CTO (after 30 minutes)
|
||||
|
||||
### Notes
|
||||
|
||||
- Always create a backup before rollback
|
||||
- Document every step during rollback
|
||||
- Test in staging before production if possible
|
||||
- Keep stakeholders informed throughout process
|
||||
Reference in New Issue
Block a user