chore: remove outdated documentation and reference files
Some checks failed
AITBC CI/CD Pipeline / lint-and-test (3.11) (push) Has been cancelled
AITBC CI/CD Pipeline / lint-and-test (3.12) (push) Has been cancelled
AITBC CI/CD Pipeline / lint-and-test (3.13) (push) Has been cancelled
AITBC CI/CD Pipeline / test-cli (push) Has been cancelled
AITBC CI/CD Pipeline / test-services (push) Has been cancelled
AITBC CI/CD Pipeline / test-production-services (push) Has been cancelled
AITBC CI/CD Pipeline / security-scan (push) Has been cancelled
AITBC CI/CD Pipeline / build (push) Has been cancelled
AITBC CI/CD Pipeline / deploy-staging (push) Has been cancelled
AITBC CI/CD Pipeline / deploy-production (push) Has been cancelled
AITBC CI/CD Pipeline / performance-test (push) Has been cancelled
AITBC CI/CD Pipeline / docs (push) Has been cancelled
AITBC CI/CD Pipeline / release (push) Has been cancelled
AITBC CI/CD Pipeline / notify (push) Has been cancelled
Security Scanning / Bandit Security Scan (apps/coordinator-api/src) (push) Has been cancelled
Security Scanning / Bandit Security Scan (cli/aitbc_cli) (push) Has been cancelled
Security Scanning / Bandit Security Scan (packages/py/aitbc-core/src) (push) Has been cancelled
Security Scanning / Bandit Security Scan (packages/py/aitbc-crypto/src) (push) Has been cancelled
Security Scanning / Bandit Security Scan (packages/py/aitbc-sdk/src) (push) Has been cancelled
Security Scanning / Bandit Security Scan (tests) (push) Has been cancelled
Security Scanning / CodeQL Security Analysis (javascript) (push) Has been cancelled
Security Scanning / CodeQL Security Analysis (python) (push) Has been cancelled
Security Scanning / Dependency Security Scan (push) Has been cancelled
Security Scanning / Container Security Scan (push) Has been cancelled
Security Scanning / OSSF Scorecard (push) Has been cancelled
Security Scanning / Security Summary Report (push) Has been cancelled
AITBC CLI Level 1 Commands Test / test-cli-level1 (3.11) (push) Has been cancelled
AITBC CLI Level 1 Commands Test / test-cli-level1 (3.12) (push) Has been cancelled
AITBC CLI Level 1 Commands Test / test-cli-level1 (3.13) (push) Has been cancelled
AITBC CLI Level 1 Commands Test / test-summary (push) Has been cancelled
Some checks failed
AITBC CI/CD Pipeline / lint-and-test (3.11) (push) Has been cancelled
AITBC CI/CD Pipeline / lint-and-test (3.12) (push) Has been cancelled
AITBC CI/CD Pipeline / lint-and-test (3.13) (push) Has been cancelled
AITBC CI/CD Pipeline / test-cli (push) Has been cancelled
AITBC CI/CD Pipeline / test-services (push) Has been cancelled
AITBC CI/CD Pipeline / test-production-services (push) Has been cancelled
AITBC CI/CD Pipeline / security-scan (push) Has been cancelled
AITBC CI/CD Pipeline / build (push) Has been cancelled
AITBC CI/CD Pipeline / deploy-staging (push) Has been cancelled
AITBC CI/CD Pipeline / deploy-production (push) Has been cancelled
AITBC CI/CD Pipeline / performance-test (push) Has been cancelled
AITBC CI/CD Pipeline / docs (push) Has been cancelled
AITBC CI/CD Pipeline / release (push) Has been cancelled
AITBC CI/CD Pipeline / notify (push) Has been cancelled
Security Scanning / Bandit Security Scan (apps/coordinator-api/src) (push) Has been cancelled
Security Scanning / Bandit Security Scan (cli/aitbc_cli) (push) Has been cancelled
Security Scanning / Bandit Security Scan (packages/py/aitbc-core/src) (push) Has been cancelled
Security Scanning / Bandit Security Scan (packages/py/aitbc-crypto/src) (push) Has been cancelled
Security Scanning / Bandit Security Scan (packages/py/aitbc-sdk/src) (push) Has been cancelled
Security Scanning / Bandit Security Scan (tests) (push) Has been cancelled
Security Scanning / CodeQL Security Analysis (javascript) (push) Has been cancelled
Security Scanning / CodeQL Security Analysis (python) (push) Has been cancelled
Security Scanning / Dependency Security Scan (push) Has been cancelled
Security Scanning / Container Security Scan (push) Has been cancelled
Security Scanning / OSSF Scorecard (push) Has been cancelled
Security Scanning / Security Summary Report (push) Has been cancelled
AITBC CLI Level 1 Commands Test / test-cli-level1 (3.11) (push) Has been cancelled
AITBC CLI Level 1 Commands Test / test-cli-level1 (3.12) (push) Has been cancelled
AITBC CLI Level 1 Commands Test / test-cli-level1 (3.13) (push) Has been cancelled
AITBC CLI Level 1 Commands Test / test-summary (push) Has been cancelled
- Remove debugging service documentation (DEBUgging_SERVICES.md) - Remove development logs policy and quick reference guides - Remove E2E test creation summary - Remove gift certificate example file - Remove GitHub pull summary documentation
This commit is contained in:
291
scripts/monitoring/production_monitoring.sh
Executable file
291
scripts/monitoring/production_monitoring.sh
Executable file
@@ -0,0 +1,291 @@
|
||||
#!/bin/bash
|
||||
#
|
||||
# Production Monitoring Setup for AITBC Platform
|
||||
# Configures monitoring, alerting, and observability
|
||||
#
|
||||
|
||||
set -euo pipefail
|
||||
|
||||
# Colors
|
||||
GREEN='\033[0;32m'
|
||||
BLUE='\033[0;34m'
|
||||
YELLOW='\033[1;33m'
|
||||
NC='\033[0m'
|
||||
|
||||
log() { echo -e "${BLUE}[$(date +'%Y-%m-%d %H:%M:%S')]${NC} $1"; }
|
||||
success() { echo -e "${GREEN}✅ $1${NC}"; }
|
||||
warning() { echo -e "${YELLOW}⚠️ $1${NC}"; }
|
||||
|
||||
# Create monitoring directory
|
||||
MONITORING_DIR="/opt/aitbc/monitoring"
|
||||
mkdir -p "$MONITORING_DIR"
|
||||
|
||||
# Setup system metrics collection
|
||||
setup_system_metrics() {
|
||||
log "Setting up system metrics collection..."
|
||||
|
||||
# Create metrics collection script
|
||||
cat > "$MONITORING_DIR/collect_metrics.sh" << 'EOF'
|
||||
#!/bin/bash
|
||||
# System metrics collection for AITBC platform
|
||||
|
||||
METRICS_FILE="/opt/aitbc/monitoring/metrics.log"
|
||||
TIMESTAMP=$(date -Iseconds)
|
||||
|
||||
# System metrics
|
||||
CPU_USAGE=$(top -bn1 | grep "Cpu(s)" | awk '{print $2}' | sed 's/%us,//')
|
||||
MEM_USAGE=$(free | grep Mem | awk '{printf "%.1f", $3/$2 * 100.0}')
|
||||
DISK_USAGE=$(df -h / | awk 'NR==2{print $5}' | sed 's/%//')
|
||||
|
||||
# Service metrics
|
||||
COORDINATOR_STATUS=$(systemctl is-active aitbc-coordinator)
|
||||
BLOCKCHAIN_STATUS=$(systemctl is-active blockchain-node)
|
||||
|
||||
# API metrics
|
||||
API_RESPONSE_TIME=$(curl -o /dev/null -s -w '%{time_total}' https://aitbc.bubuit.net/api/v1/health 2>/dev/null || echo "0")
|
||||
API_STATUS=$(curl -o /dev/null -s -w '%{http_code}' https://aitbc.bubuit.net/api/v1/health 2>/dev/null || echo "000")
|
||||
|
||||
# Write metrics
|
||||
echo "$TIMESTAMP,cpu:$CPU_USAGE,memory:$MEM_USAGE,disk:$DISK_USAGE,coordinator:$COORDINATOR_STATUS,blockchain:$BLOCKCHAIN_STATUS,api_time:$API_RESPONSE_TIME,api_status:$API_STATUS" >> "$METRICS_FILE"
|
||||
|
||||
# Keep only last 1000 lines
|
||||
tail -n 1000 "$METRICS_FILE" > "$METRICS_FILE.tmp" && mv "$METRICS_FILE.tmp" "$METRICS_FILE"
|
||||
EOF
|
||||
|
||||
chmod +x "$MONITORING_DIR/collect_metrics.sh"
|
||||
|
||||
# Add to crontab (every 2 minutes)
|
||||
(crontab -l 2>/dev/null; echo "*/2 * * * * $MONITORING_DIR/collect_metrics.sh") | crontab -
|
||||
|
||||
success "System metrics collection configured"
|
||||
}
|
||||
|
||||
# Setup alerting system
|
||||
setup_alerting() {
|
||||
log "Setting up alerting system..."
|
||||
|
||||
# Create alerting script
|
||||
cat > "$MONITORING_DIR/check_alerts.sh" << 'EOF'
|
||||
#!/bin/bash
|
||||
# Alert checking for AITBC platform
|
||||
|
||||
ALERT_LOG="/opt/aitbc/monitoring/alerts.log"
|
||||
TIMESTAMP=$(date -Iseconds)
|
||||
ALERT_TRIGGERED=false
|
||||
|
||||
# Check service status
|
||||
check_service() {
|
||||
local service=$1
|
||||
local status=$(systemctl is-active "$service" 2>/dev/null || echo "failed")
|
||||
|
||||
if [[ "$status" != "active" ]]; then
|
||||
echo "$TIMESTAMP,SERVICE,$service is $status" >> "$ALERT_LOG"
|
||||
echo "🚨 ALERT: Service $service is $status"
|
||||
ALERT_TRIGGERED=true
|
||||
fi
|
||||
}
|
||||
|
||||
# Check API health
|
||||
check_api() {
|
||||
local response=$(curl -s -o /dev/null -w '%{http_code}' https://aitbc.bubuit.net/api/v1/health 2>/dev/null || echo "000")
|
||||
|
||||
if [[ "$response" != "200" ]]; then
|
||||
echo "$TIMESTAMP,API,Health endpoint returned $response" >> "$ALERT_LOG"
|
||||
echo "🚨 ALERT: API health check failed (HTTP $response)"
|
||||
ALERT_TRIGGERED=true
|
||||
fi
|
||||
}
|
||||
|
||||
# Check disk space
|
||||
check_disk() {
|
||||
local usage=$(df / | awk 'NR==2{print $5}' | sed 's/%//')
|
||||
|
||||
if [[ $usage -gt 80 ]]; then
|
||||
echo "$TIMESTAMP,DISK,Disk usage is ${usage}%" >> "$ALERT_LOG"
|
||||
echo "🚨 ALERT: Disk usage is ${usage}%"
|
||||
ALERT_TRIGGERED=true
|
||||
fi
|
||||
}
|
||||
|
||||
# Check memory usage
|
||||
check_memory() {
|
||||
local usage=$(free | grep Mem | awk '{printf "%.0f", $3/$2 * 100.0}')
|
||||
|
||||
if [[ $usage -gt 90 ]]; then
|
||||
echo "$TIMESTAMP,MEMORY,Memory usage is ${usage}%" >> "$ALERT_LOG"
|
||||
echo "🚨 ALERT: Memory usage is ${usage}%"
|
||||
ALERT_TRIGGERED=true
|
||||
fi
|
||||
}
|
||||
|
||||
# Run checks
|
||||
check_service "aitbc-coordinator"
|
||||
check_service "blockchain-node"
|
||||
check_api
|
||||
check_disk
|
||||
check_memory
|
||||
|
||||
# If no alerts, log all clear
|
||||
if [[ "$ALERT_TRIGGERED" == "false" ]]; then
|
||||
echo "$TIMESTAMP,ALL_CLEAR,All systems operational" >> "$ALERT_LOG"
|
||||
fi
|
||||
EOF
|
||||
|
||||
chmod +x "$MONITORING_DIR/check_alerts.sh"
|
||||
|
||||
# Add to crontab (every 5 minutes)
|
||||
(crontab -l 2>/dev/null; echo "*/5 * * * * $MONITORING_DIR/check_alerts.sh") | crontab -
|
||||
|
||||
success "Alerting system configured"
|
||||
}
|
||||
|
||||
# Setup performance dashboard
|
||||
setup_dashboard() {
|
||||
log "Setting up performance dashboard..."
|
||||
|
||||
# Create dashboard script
|
||||
cat > "$MONITORING_DIR/dashboard.sh" << 'EOF'
|
||||
#!/bin/bash
|
||||
# Performance dashboard for AITBC platform
|
||||
|
||||
clear
|
||||
echo "🔍 AITBC Platform Performance Dashboard"
|
||||
echo "========================================"
|
||||
echo "Last Updated: $(date)"
|
||||
echo ""
|
||||
|
||||
# System Status
|
||||
echo "📊 System Status:"
|
||||
echo "CPU: $(top -bn1 | grep "Cpu(s)" | awk '{print $2}' | sed 's/%us,//')% used"
|
||||
echo "Memory: $(free -h | grep Mem | awk '{print $3"/"$2}')"
|
||||
echo "Disk: $(df -h / | awk 'NR==2{print $3"/"$2" ("$5")"}')"
|
||||
echo ""
|
||||
|
||||
# Service Status
|
||||
echo "🔧 Service Status:"
|
||||
systemctl is-active aitbc-coordinator && echo "✅ Coordinator API: Active" || echo "❌ Coordinator API: Inactive"
|
||||
systemctl is-active blockchain-node && echo "✅ Blockchain Node: Active" || echo "❌ Blockchain Node: Inactive"
|
||||
systemctl is-active nginx && echo "✅ Nginx: Active" || echo "❌ Nginx: Inactive"
|
||||
echo ""
|
||||
|
||||
# API Performance
|
||||
echo "🌐 API Performance:"
|
||||
API_TIME=$(curl -o /dev/null -s -w '%{time_total}' https://aitbc.bubuit.net/api/v1/health 2>/dev/null || echo "0.000")
|
||||
echo "Health Endpoint: ${API_TIME}s"
|
||||
echo ""
|
||||
|
||||
# Recent Alerts (last 10)
|
||||
echo "🚨 Recent Alerts:"
|
||||
if [[ -f /opt/aitbc/monitoring/alerts.log ]]; then
|
||||
tail -n 10 /opt/aitbc/monitoring/alerts.log | while IFS=',' read -r timestamp type message; do
|
||||
echo " $timestamp: $message"
|
||||
done
|
||||
else
|
||||
echo " No alerts logged"
|
||||
fi
|
||||
echo ""
|
||||
|
||||
# Quick Stats
|
||||
echo "📈 Quick Stats:"
|
||||
if [[ -f /opt/aitbc/monitoring/metrics.log ]]; then
|
||||
echo " Metrics collected: $(wc -l < /opt/aitbc/monitoring/metrics.log) entries"
|
||||
echo " Alerts triggered: $(grep -c "ALERT" /opt/aitbc/monitoring/alerts.log 2>/dev/null || echo "0")"
|
||||
fi
|
||||
|
||||
echo ""
|
||||
echo "Press Ctrl+C to exit, or refresh in 30 seconds..."
|
||||
sleep 30
|
||||
exec "$0"
|
||||
EOF
|
||||
|
||||
chmod +x "$MONITORING_DIR/dashboard.sh"
|
||||
|
||||
success "Performance dashboard created"
|
||||
}
|
||||
|
||||
# Setup log analysis
|
||||
setup_log_analysis() {
|
||||
log "Setting up log analysis..."
|
||||
|
||||
# Create log analysis script
|
||||
cat > "$MONITORING_DIR/analyze_logs.sh" << 'EOF'
|
||||
#!/bin/bash
|
||||
# Log analysis for AITBC platform
|
||||
|
||||
LOG_DIR="/var/log"
|
||||
ANALYSIS_FILE="/opt/aitbc/monitoring/log_analysis.txt"
|
||||
TIMESTAMP=$(date -Iseconds)
|
||||
|
||||
echo "=== Log Analysis - $TIMESTAMP ===" >> "$ANALYSIS_FILE"
|
||||
|
||||
# Analyze nginx logs
|
||||
if [[ -f "$LOG_DIR/nginx/access.log" ]]; then
|
||||
echo "" >> "$ANALYSIS_FILE"
|
||||
echo "NGINX Access Analysis:" >> "$ANALYSIS_FILE"
|
||||
|
||||
# Top 10 endpoints
|
||||
echo "Top 10 endpoints:" >> "$ANALYSIS_FILE"
|
||||
awk '{print $7}' "$LOG_DIR/nginx/access.log" | sort | uniq -c | sort -nr | head -10 >> "$ANALYSIS_FILE"
|
||||
|
||||
# HTTP status codes
|
||||
echo "" >> "$ANALYSIS_FILE"
|
||||
echo "HTTP Status Codes:" >> "$ANALYSIS_FILE"
|
||||
awk '{print $9}' "$LOG_DIR/nginx/access.log" | sort | uniq -c | sort -nr >> "$ANALYSIS_FILE"
|
||||
|
||||
# Error rate
|
||||
local total=$(wc -l < "$LOG_DIR/nginx/access.log")
|
||||
local errors=$(awk '$9 >= 400 {print}' "$LOG_DIR/nginx/access.log" | wc -l)
|
||||
local error_rate=$(echo "scale=2; $errors * 100 / $total" | bc)
|
||||
echo "" >> "$ANALYSIS_FILE"
|
||||
echo "Error Rate: ${error_rate}%" >> "$ANALYSIS_FILE"
|
||||
fi
|
||||
|
||||
# Analyze application logs
|
||||
if journalctl -u aitbc-coordinator --since "1 hour ago" | grep -q "ERROR"; then
|
||||
echo "" >> "$ANALYSIS_FILE"
|
||||
echo "Application Errors (last hour):" >> "$ANALYSIS_FILE"
|
||||
journalctl -u aitbc-coordinator --since "1 hour ago" | grep "ERROR" | tail -5 >> "$ANALYSIS_FILE"
|
||||
fi
|
||||
|
||||
echo "Analysis complete" >> "$ANALYSIS_FILE"
|
||||
EOF
|
||||
|
||||
chmod +x "$MONITORING_DIR/analyze_logs.sh"
|
||||
|
||||
# Add to crontab (hourly)
|
||||
(crontab -l 2>/dev/null; echo "0 * * * * $MONITORING_DIR/analyze_logs.sh") | crontab -
|
||||
|
||||
success "Log analysis configured"
|
||||
}
|
||||
|
||||
# Main execution
|
||||
main() {
|
||||
log "Setting up AITBC Production Monitoring..."
|
||||
|
||||
setup_system_metrics
|
||||
setup_alerting
|
||||
setup_dashboard
|
||||
setup_log_analysis
|
||||
|
||||
success "Production monitoring setup complete!"
|
||||
|
||||
echo
|
||||
echo "📊 MONITORING SUMMARY:"
|
||||
echo " ✅ System metrics collection (every 2 minutes)"
|
||||
echo " ✅ Alert checking (every 5 minutes)"
|
||||
echo " ✅ Performance dashboard"
|
||||
echo " ✅ Log analysis (hourly)"
|
||||
echo
|
||||
echo "🔧 MONITORING COMMANDS:"
|
||||
echo " Dashboard: $MONITORING_DIR/dashboard.sh"
|
||||
echo " Metrics: $MONITORING_DIR/collect_metrics.sh"
|
||||
echo " Alerts: $MONITORING_DIR/check_alerts.sh"
|
||||
echo " Log Analysis: $MONITORING_DIR/analyze_logs.sh"
|
||||
echo
|
||||
echo "📁 MONITORING FILES:"
|
||||
echo " Metrics: $MONITORING_DIR/metrics.log"
|
||||
echo " Alerts: $MONITORING_DIR/alerts.log"
|
||||
echo " Analysis: $MONITORING_DIR/log_analysis.txt"
|
||||
}
|
||||
|
||||
main "$@"
|
||||
Reference in New Issue
Block a user