Files
aitbc/apps/blockchain-node/src/aitbc_chain/network/peers.py
aitbc c876b0aa20 feat: implement AITBC mesh network deployment infrastructure
 Phase 0: Pre-implementation checklist completed
- Environment configurations (dev/staging/production)
- Directory structure setup (logs, backups, monitoring)
- Virtual environment with dependencies

 Master deployment script created
- Single command deployment with validation
- Progress tracking and rollback capability
- Health checks and deployment reporting

 Validation script created
- Module import validation
- Basic functionality testing
- Configuration and script verification

 Implementation fixes
- Fixed dataclass import in consensus keys
- Fixed async function syntax in tests
- Updated deployment script for virtual environment

🚀 Ready for deployment: ./scripts/deploy-mesh-network.sh dev
2026-04-02 12:08:15 +02:00

338 lines
13 KiB
Python

"""
Dynamic Peer Management
Handles peer join/leave operations and connection management
"""
import asyncio
import time
from typing import Dict, List, Optional, Set
from dataclasses import dataclass
from enum import Enum
from .discovery import PeerNode, NodeStatus, P2PDiscovery
from .health import PeerHealthMonitor, HealthStatus
class PeerAction(Enum):
JOIN = "join"
LEAVE = "leave"
DEMOTE = "demote"
PROMOTE = "promote"
BAN = "ban"
@dataclass
class PeerEvent:
action: PeerAction
node_id: str
timestamp: float
reason: str
metadata: Dict
class DynamicPeerManager:
"""Manages dynamic peer connections and lifecycle"""
def __init__(self, discovery: P2PDiscovery, health_monitor: PeerHealthMonitor):
self.discovery = discovery
self.health_monitor = health_monitor
self.peer_events: List[PeerEvent] = []
self.max_connections = 50
self.min_connections = 8
self.connection_retry_interval = 300 # 5 minutes
self.ban_threshold = 0.1 # Reputation below this gets banned
self.running = False
# Peer management policies
self.auto_reconnect = True
self.auto_ban_malicious = True
self.load_balance = True
async def start_management(self):
"""Start peer management service"""
self.running = True
log_info("Starting dynamic peer management")
while self.running:
try:
await self._manage_peer_connections()
await self._enforce_peer_policies()
await self._optimize_topology()
await asyncio.sleep(30) # Check every 30 seconds
except Exception as e:
log_error(f"Peer management error: {e}")
await asyncio.sleep(10)
async def stop_management(self):
"""Stop peer management service"""
self.running = False
log_info("Stopping dynamic peer management")
async def _manage_peer_connections(self):
"""Manage peer connections based on current state"""
current_peers = self.discovery.get_peer_count()
if current_peers < self.min_connections:
await self._discover_new_peers()
elif current_peers > self.max_connections:
await self._remove_excess_peers()
# Reconnect to disconnected peers
if self.auto_reconnect:
await self._reconnect_disconnected_peers()
async def _discover_new_peers(self):
"""Discover and connect to new peers"""
log_info(f"Peer count ({self.discovery.get_peer_count()}) below minimum ({self.min_connections}), discovering new peers")
# Request peer lists from existing connections
for peer in self.discovery.get_peer_list():
await self.discovery._request_peer_list(peer)
# Try to connect to bootstrap nodes
await self.discovery._connect_to_bootstrap_nodes()
async def _remove_excess_peers(self):
"""Remove excess peers based on quality metrics"""
log_info(f"Peer count ({self.discovery.get_peer_count()}) above maximum ({self.max_connections}), removing excess peers")
peers = self.discovery.get_peer_list()
# Sort peers by health score and reputation
sorted_peers = sorted(
peers,
key=lambda p: (
self.health_monitor.get_health_status(p.node_id).health_score if
self.health_monitor.get_health_status(p.node_id) else 0.0,
p.reputation
)
)
# Remove lowest quality peers
excess_count = len(peers) - self.max_connections
for i in range(excess_count):
peer_to_remove = sorted_peers[i]
await self._remove_peer(peer_to_remove.node_id, "Excess peer removed")
async def _reconnect_disconnected_peers(self):
"""Reconnect to peers that went offline"""
# Get recently disconnected peers
all_health = self.health_monitor.get_all_health_status()
for node_id, health in all_health.items():
if (health.status == NodeStatus.OFFLINE and
time.time() - health.last_check < self.connection_retry_interval):
# Try to reconnect
peer = self.discovery.peers.get(node_id)
if peer:
success = await self.discovery._connect_to_peer(peer.address, peer.port)
if success:
log_info(f"Reconnected to peer {node_id}")
async def _enforce_peer_policies(self):
"""Enforce peer management policies"""
if self.auto_ban_malicious:
await self._ban_malicious_peers()
await self._update_peer_reputations()
async def _ban_malicious_peers(self):
"""Ban peers with malicious behavior"""
for peer in self.discovery.get_peer_list():
if peer.reputation < self.ban_threshold:
await self._ban_peer(peer.node_id, "Reputation below threshold")
async def _update_peer_reputations(self):
"""Update peer reputations based on health metrics"""
for peer in self.discovery.get_peer_list():
health = self.health_monitor.get_health_status(peer.node_id)
if health:
# Update reputation based on health score
reputation_delta = (health.health_score - 0.5) * 0.1 # Small adjustments
self.discovery.update_peer_reputation(peer.node_id, reputation_delta)
async def _optimize_topology(self):
"""Optimize network topology for better performance"""
if not self.load_balance:
return
peers = self.discovery.get_peer_list()
healthy_peers = self.health_monitor.get_healthy_peers()
# Prioritize connections to healthy peers
for peer in peers:
if peer.node_id not in healthy_peers:
# Consider replacing unhealthy peer
await self._consider_peer_replacement(peer)
async def _consider_peer_replacement(self, unhealthy_peer: PeerNode):
"""Consider replacing unhealthy peer with better alternative"""
# This would implement logic to find and connect to better peers
# For now, just log the consideration
log_info(f"Considering replacement for unhealthy peer {unhealthy_peer.node_id}")
async def add_peer(self, address: str, port: int, public_key: str = "") -> bool:
"""Manually add a new peer"""
try:
success = await self.discovery._connect_to_peer(address, port)
if success:
# Record peer join event
self._record_peer_event(PeerAction.JOIN, f"{address}:{port}", "Manual peer addition")
log_info(f"Successfully added peer {address}:{port}")
return True
else:
log_warn(f"Failed to add peer {address}:{port}")
return False
except Exception as e:
log_error(f"Error adding peer {address}:{port}: {e}")
return False
async def remove_peer(self, node_id: str, reason: str = "Manual removal") -> bool:
"""Manually remove a peer"""
return await self._remove_peer(node_id, reason)
async def _remove_peer(self, node_id: str, reason: str) -> bool:
"""Remove peer from network"""
try:
if node_id in self.discovery.peers:
peer = self.discovery.peers[node_id]
# Close connection if open
# This would be implemented with actual connection management
# Remove from discovery
del self.discovery.peers[node_id]
# Remove from health monitoring
if node_id in self.health_monitor.health_status:
del self.health_monitor.health_status[node_id]
# Record peer leave event
self._record_peer_event(PeerAction.LEAVE, node_id, reason)
log_info(f"Removed peer {node_id}: {reason}")
return True
else:
log_warn(f"Peer {node_id} not found for removal")
return False
except Exception as e:
log_error(f"Error removing peer {node_id}: {e}")
return False
async def ban_peer(self, node_id: str, reason: str = "Banned by administrator") -> bool:
"""Ban a peer from the network"""
return await self._ban_peer(node_id, reason)
async def _ban_peer(self, node_id: str, reason: str) -> bool:
"""Ban peer and prevent reconnection"""
success = await self._remove_peer(node_id, f"BANNED: {reason}")
if success:
# Record ban event
self._record_peer_event(PeerAction.BAN, node_id, reason)
# Add to ban list (would be persistent in real implementation)
log_info(f"Banned peer {node_id}: {reason}")
return success
async def promote_peer(self, node_id: str) -> bool:
"""Promote peer to higher priority"""
try:
if node_id in self.discovery.peers:
peer = self.discovery.peers[node_id]
# Increase reputation
self.discovery.update_peer_reputation(node_id, 0.1)
# Record promotion event
self._record_peer_event(PeerAction.PROMOTE, node_id, "Peer promoted")
log_info(f"Promoted peer {node_id}")
return True
else:
log_warn(f"Peer {node_id} not found for promotion")
return False
except Exception as e:
log_error(f"Error promoting peer {node_id}: {e}")
return False
async def demote_peer(self, node_id: str) -> bool:
"""Demote peer to lower priority"""
try:
if node_id in self.discovery.peers:
peer = self.discovery.peers[node_id]
# Decrease reputation
self.discovery.update_peer_reputation(node_id, -0.1)
# Record demotion event
self._record_peer_event(PeerAction.DEMOTE, node_id, "Peer demoted")
log_info(f"Demoted peer {node_id}")
return True
else:
log_warn(f"Peer {node_id} not found for demotion")
return False
except Exception as e:
log_error(f"Error demoting peer {node_id}: {e}")
return False
def _record_peer_event(self, action: PeerAction, node_id: str, reason: str, metadata: Dict = None):
"""Record peer management event"""
event = PeerEvent(
action=action,
node_id=node_id,
timestamp=time.time(),
reason=reason,
metadata=metadata or {}
)
self.peer_events.append(event)
# Limit event history size
if len(self.peer_events) > 1000:
self.peer_events = self.peer_events[-500:] # Keep last 500 events
def get_peer_events(self, node_id: Optional[str] = None, limit: int = 100) -> List[PeerEvent]:
"""Get peer management events"""
events = self.peer_events
if node_id:
events = [e for e in events if e.node_id == node_id]
return events[-limit:]
def get_peer_statistics(self) -> Dict:
"""Get peer management statistics"""
peers = self.discovery.get_peer_list()
health_status = self.health_monitor.get_all_health_status()
stats = {
"total_peers": len(peers),
"healthy_peers": len(self.health_monitor.get_healthy_peers()),
"unhealthy_peers": len(self.health_monitor.get_unhealthy_peers()),
"average_reputation": sum(p.reputation for p in peers) / len(peers) if peers else 0,
"average_health_score": sum(h.health_score for h in health_status.values()) / len(health_status) if health_status else 0,
"recent_events": len([e for e in self.peer_events if time.time() - e.timestamp < 3600]) # Last hour
}
return stats
# Global peer manager
peer_manager: Optional[DynamicPeerManager] = None
def get_peer_manager() -> Optional[DynamicPeerManager]:
"""Get global peer manager"""
return peer_manager
def create_peer_manager(discovery: P2PDiscovery, health_monitor: PeerHealthMonitor) -> DynamicPeerManager:
"""Create and set global peer manager"""
global peer_manager
peer_manager = DynamicPeerManager(discovery, health_monitor)
return peer_manager