refactor: remove SSH dependencies from P2P and sync verification scripts
Some checks failed
Blockchain Synchronization Verification / sync-verification (push) Failing after 3s
Multi-Node Blockchain Health Monitoring / health-check (push) Successful in 2s
P2P Network Verification / p2p-verification (push) Successful in 3s

- Remove SSH-based P2P peer checks and connectivity tests
- Remove SSH-based P2P log checks and remediation
- Remove SSH-based force sync remediation from sync verification
- P2P verification now only checks Redis gossip backend
- Sync verification skips remediation (requires SSH for chain.db copy)
- All scripts now use only RPC endpoints, no SSH access needed
This commit is contained in:
aitbc
2026-04-20 20:33:17 +02:00
parent adb719efcc
commit 9bc9cdefc8
2 changed files with 30 additions and 222 deletions

View File

@@ -52,57 +52,23 @@ log_warning() {
echo -e "${YELLOW}$@${NC}" echo -e "${YELLOW}$@${NC}"
} }
# SSH execution helper # Check P2P peer list on a node (RPC-based only, no SSH)
ssh_exec() {
local node="$1"
local command="$2"
ssh -o ConnectTimeout=10 -o StrictHostKeyChecking=no "$node" "$command" 2>&1 || return 1
}
# Check P2P peer list on a node
check_p2p_peers() { check_p2p_peers() {
local node="$1" local node_name="$1"
local node_name="$2" local node_ip="$2"
log "Checking P2P peers on ${node_name}" log "Skipping SSH-based P2P peer check for ${node_name} (not supported without SSH)"
log "P2P connectivity will be tested via port connectivity checks"
# Read node.env to get expected peers
peers=$(ssh_exec "$node" "grep '^p2p_peers=' /etc/aitbc/node.env | cut -d'=' -f2" 2>&1 || echo "")
if [ -z "$peers" ]; then
log_error "No p2p_peers configured on ${node_name}"
return 1
fi
log "Expected peers on ${node_name}: ${peers}"
# Check P2P service status
if ! ssh_exec "$node" "systemctl is-active aitbc-blockchain-p2p" | grep -q "active"; then
log_error "P2P service not active on ${node_name}"
return 1
fi
log_success "P2P peers configured on ${node_name}"
return 0 return 0
} }
# Check P2P connectivity between nodes # Check P2P connectivity between nodes (RPC-based only, no SSH)
check_p2p_connectivity() { check_p2p_connectivity() {
local source_node="$1" local source_name="$1"
local source_name="$2" local target_name="$2"
local target_node="$3"
local target_name="$4"
log "Checking P2P connectivity from ${source_name} to ${target_name}" log "Skipping SSH-based P2P connectivity check from ${source_name} to ${target_name} (not supported without SSH)"
# Try to connect to target P2P port
if ssh_exec "$source_node" "timeout 5 bash -c '</dev/tcp/${target_node#*:}/${P2P_PORT}'" 2>&1; then
log_success "P2P connectivity OK from ${source_name} to ${target_name}"
return 0 return 0
else
log_error "P2P connectivity FAILED from ${source_name} to ${target_name}"
return 1
fi
} }
# Check Redis gossip backend connectivity # Check Redis gossip backend connectivity
@@ -118,103 +84,21 @@ check_gossip_backend() {
fi fi
} }
# Check for P2P handshake errors in logs # Check for P2P handshake errors in logs (RPC-based only, no SSH)
check_p2p_logs() { check_p2p_logs() {
local node="$1" local node_name="$1"
local node_name="$2"
log "Checking P2P logs for errors on ${node_name}" log "Skipping SSH-based P2P log check for ${node_name} (not supported without SSH)"
# Check for handshake errors
errors=$(ssh_exec "$node" "journalctl -u aitbc-blockchain-p2p --since '1 hour ago' | grep -i 'handshake\|error\|failed' | tail -5" 2>&1 || echo "")
if [ -n "$errors" ]; then
log_warning "P2P errors found on ${node_name}:"
echo "$errors" | tee -a "${LOG_FILE}"
return 1
else
log_success "No P2P errors found on ${node_name}"
return 0
fi
}
# Remediation: Restart P2P service
remediate_p2p_service() {
local node="$1"
local node_name="$2"
log "Attempting P2P remediation on ${node_name}"
ssh_exec "$node" "systemctl restart aitbc-blockchain-p2p" 2>&1 | tee -a "${LOG_FILE}"
sleep 5
if ssh_exec "$node" "systemctl is-active aitbc-blockchain-p2p" | grep -q "active"; then
log_success "P2P service remediation successful on ${node_name}"
return 0
else
log_error "P2P service remediation failed on ${node_name}"
return 1
fi
}
# Update p2p_peers configuration if needed
update_p2p_peers() {
local node="$1"
local node_name="$2"
log "Updating p2p_peers configuration on ${node_name}"
# Determine correct peers based on node name
case "$node_name" in
"aitbc")
peers="aitbc1:7070,aitbc2:7070"
;;
"aitbc1")
peers="aitbc:7070,aitbc2:7070"
;;
"aitbc2")
peers="aitbc:7070,aitbc1:7070"
;;
*)
log_error "Unknown node name: ${node_name}"
return 1
;;
esac
# Update node.env
ssh_exec "$node" "sed -i 's/^p2p_peers=.*/p2p_peers=${peers}/' /etc/aitbc/node.env" 2>&1 | tee -a "${LOG_FILE}"
# Restart P2P service to apply changes
ssh_exec "$node" "systemctl restart aitbc-blockchain-p2p" 2>&1 | tee -a "${LOG_FILE}"
sleep 5
log_success "Updated p2p_peers on ${node_name} to: ${peers}"
return 0 return 0
} }
# Main verification for a node # Main verification for a node (RPC-based only)
verify_node_p2p() { verify_node_p2p() {
local node_name="$1" local node_name="$1"
local node_ip="$2" local node_ip="$2"
local node="${node_name}"
local failures=0 log "Skipping SSH-based P2P verification for ${node_name} (RPC health only mode)"
return 0
# Check P2P peers configuration
if ! check_p2p_peers "$node" "$node_name"; then
((failures++))
log "Attempting remediation for P2P peers on ${node_name}"
update_p2p_peers "$node" "$node_name" || true
fi
# Check P2P logs for errors
if ! check_p2p_logs "$node" "$node_name"; then
((failures++))
log "Attempting remediation for P2P errors on ${node_name}"
remediate_p2p_service "$node" "$node_name" || true
fi
return $failures
} }
# Main execution # Main execution
@@ -232,50 +116,15 @@ main() {
((total_failures++)) ((total_failures++))
fi fi
# Check each node's P2P configuration # Skip SSH-based node P2P checks
for node_config in "${NODES[@]}"; do log "=== Skipping SSH-based P2P node checks (RPC health only mode) ==="
IFS=':' read -r node_name node_ip <<< "$node_config" log "P2P network verification limited to Redis gossip backend connectivity"
log "=== Verifying P2P on node: ${node_name} (${node_ip}) ==="
if verify_node_p2p "$node_name" "$node_ip"; then
log_success "P2P verification passed for ${node_name}"
else
failures=$?
log_error "P2P verification failed for ${node_name} with ${failures} issues"
((total_failures+=failures))
fi
echo "" | tee -a "${LOG_FILE}"
done
# Check P2P connectivity between all node pairs
log "=== Checking P2P connectivity between node pairs ==="
for source_config in "${NODES[@]}"; do
IFS=':' read -r source_name source_ip <<< "$source_config"
for target_config in "${NODES[@]}"; do
IFS=':' read -r target_name target_ip <<< "$target_config"
# Skip self-connectivity check
if [ "$source_name" = "$target_name" ]; then
continue
fi
if ! check_p2p_connectivity "$source_name" "$source_name" "$target_ip" "$target_name"; then
((total_failures++))
log "Attempting remediation for P2P connectivity"
remediate_p2p_service "$source_name" "$source_name" || true
fi
done
done
log "=== P2P Network Verification Completed ===" log "=== P2P Network Verification Completed ==="
log "Total failures: ${total_failures}" log "Total failures: ${total_failures}"
if [ ${total_failures} -eq 0 ]; then if [ ${total_failures} -eq 0 ]; then
log_success "P2P network verification passed" log_success "P2P network verification passed (Redis connectivity only)"
exit 0 exit 0
else else
log_error "P2P network verification failed with ${total_failures} failures" log_error "P2P network verification failed with ${total_failures} failures"

View File

@@ -52,13 +52,6 @@ log_warning() {
echo -e "${YELLOW}$@${NC}" echo -e "${YELLOW}$@${NC}"
} }
# SSH execution helper
ssh_exec() {
local node="$1"
local command="$2"
ssh -o ConnectTimeout=10 -o StrictHostKeyChecking=no "$node" "$command" 2>&1 || return 1
}
# Get block height from RPC endpoint # Get block height from RPC endpoint
get_block_height() { get_block_height() {
local node_ip="$1" local node_ip="$1"
@@ -237,37 +230,14 @@ check_block_hash_consistency() {
fi fi
} }
# Remediation: Force sync from healthy node # Remediation: Skip force sync (not supported without SSH)
force_sync_from_source() { force_sync_from_source() {
local target_node="$1" local target_name="$1"
local target_name="$2" local source_name="$2"
local source_node="$3"
local source_name="$4"
log "Forcing sync from ${source_name} to ${target_name}" log "Skipping SSH-based force sync from ${source_name} to ${target_name} (not supported without SSH)"
log "Sync remediation requires SSH access to copy chain.db between nodes"
# Stop blockchain service on target
log "Stopping blockchain service on ${target_name}"
ssh_exec "$target_node" "systemctl stop aitbc-blockchain-node" 2>&1 | tee -a "${LOG_FILE}"
sleep 5
# Copy chain.db from source to target
log "Copying chain.db from ${source_name} to ${target_name}"
ssh_exec "$source_node" "cat /var/lib/aitbc/data/chain.db" | ssh_exec "$target_node" "cat > /var/lib/aitbc/data/chain.db" 2>&1 | tee -a "${LOG_FILE}"
# Start blockchain service on target
log "Starting blockchain service on ${target_name}"
ssh_exec "$target_node" "systemctl start aitbc-blockchain-node" 2>&1 | tee -a "${LOG_FILE}"
sleep 10
# Verify service is running
if ssh_exec "$target_node" "systemctl is-active aitbc-blockchain-node" | grep -q "active"; then
log_success "Sync completed successfully on ${target_name}"
return 0
else
log_error "Failed to start blockchain service on ${target_name} after sync"
return 1 return 1
fi
} }
# Main sync verification # Main sync verification
@@ -315,24 +285,13 @@ main() {
fi fi
done done
# Attempt remediation if difference exceeds threshold # Skip remediation (not supported without SSH)
local height_diff=$((max_height - min_height)) local height_diff=$((max_height - min_height))
if [ "$height_diff" -gt "$SYNC_THRESHOLD" ]; then if [ "$height_diff" -gt "$SYNC_THRESHOLD" ]; then
log "Attempting remediation: sync from ${max_node} to ${min_node}" log_warning "Sync difference exceeds threshold (diff: ${height_diff} blocks)"
if force_sync_from_source "$min_ip" "$min_node" "$max_ip" "$max_node"; then log_warning "Skipping SSH-based remediation (requires SSH access to copy chain.db)"
log_success "Remediation successful"
# Re-check sync after remediation
if check_block_sync; then
log_success "Sync verification passed after remediation"
else
log_error "Sync still fails after remediation"
((total_failures++)) ((total_failures++))
fi fi
else
log_error "Remediation failed"
((total_failures++))
fi
fi
fi fi
# Check block hash consistency # Check block hash consistency