aitbc/scripts/plan/02_network_infrastructure.sh

#!/bin/bash

# Phase 2: Network Infrastructure Setup Script
# Implements P2P discovery, dynamic peer management, and mesh routing

set -e

echo "=== PHASE 2: NETWORK INFRASTRUCTURE SETUP ==="

# Configuration
NETWORK_DIR="/opt/aitbc/apps/blockchain-node/src/aitbc_chain/network"
TEST_NODES=("node1" "node2" "node3" "node4" "node5")
BOOTSTRAP_NODES=("10.1.223.93:8000" "10.1.223.40:8000")

# Colors for output
RED='\033[0;31m'
GREEN='\033[0;32m'
YELLOW='\033[1;33m'
BLUE='\033[0;34m'
NC='\033[0m' # No Color

log_info() {
    echo -e "${GREEN}[INFO]${NC} $1"
}

log_warn() {
    echo -e "${YELLOW}[WARN]${NC} $1"
}

log_error() {
    echo -e "${RED}[ERROR]${NC} $1"
}

log_debug() {
    echo -e "${BLUE}[DEBUG]${NC} $1"
}

# Function to backup existing network files
backup_network() {
    log_info "Backing up existing network files..."
    if [ -d "$NETWORK_DIR" ]; then
        cp -r "$NETWORK_DIR" "${NETWORK_DIR}_backup_$(date +%Y%m%d_%H%M%S)"
        log_info "Backup completed"
    fi
}

# Function to create P2P discovery service
create_p2p_discovery() {
    log_info "Creating P2P discovery service..."

    cat > "$NETWORK_DIR/discovery.py" << 'EOF'
"""
P2P Node Discovery Service
Handles bootstrap nodes and peer discovery for mesh network
"""

import asyncio
import json
import time
import hashlib
from typing import List, Dict, Optional, Set, Tuple
from dataclasses import dataclass, asdict
from enum import Enum
import socket
import struct

class NodeStatus(Enum):
    ONLINE = "online"
    OFFLINE = "offline"
    CONNECTING = "connecting"
    ERROR = "error"

@dataclass
class PeerNode:
    node_id: str
    address: str
    port: int
    public_key: str
    last_seen: float
    status: NodeStatus
    capabilities: List[str]
    reputation: float
    connection_count: int

@dataclass
class DiscoveryMessage:
    message_type: str
    node_id: str
    address: str
    port: int
    timestamp: float
    signature: str

class P2PDiscovery:
    """P2P node discovery and management service"""

    def __init__(self, local_node_id: str, local_address: str, local_port: int):
        self.local_node_id = local_node_id
        self.local_address = local_address
        self.local_port = local_port
        self.peers: Dict[str, PeerNode] = {}
        self.bootstrap_nodes: List[Tuple[str, int]] = []
        self.discovery_interval = 30  # seconds
        self.peer_timeout = 300  # 5 minutes
        self.max_peers = 50
        self.running = False

    def add_bootstrap_node(self, address: str, port: int):
        """Add bootstrap node for initial connection"""
        self.bootstrap_nodes.append((address, port))

    def generate_node_id(self, address: str, port: int, public_key: str) -> str:
        """Generate unique node ID from address, port, and public key"""
        content = f"{address}:{port}:{public_key}"
        return hashlib.sha256(content.encode()).hexdigest()

    async def start_discovery(self):
        """Start the discovery service"""
        self.running = True
        log_info(f"Starting P2P discovery for node {self.local_node_id}")

        # Start discovery tasks
        tasks = [
            asyncio.create_task(self._discovery_loop()),
            asyncio.create_task(self._peer_health_check()),
            asyncio.create_task(self._listen_for_discovery())
        ]

        try:
            await asyncio.gather(*tasks)
        except Exception as e:
            log_error(f"Discovery service error: {e}")
        finally:
            self.running = False

    async def stop_discovery(self):
        """Stop the discovery service"""
        self.running = False
        log_info("Stopping P2P discovery service")

    async def _discovery_loop(self):
        """Main discovery loop"""
        while self.running:
            try:
                # Connect to bootstrap nodes if no peers
                if len(self.peers) == 0:
                    await self._connect_to_bootstrap_nodes()

                # Discover new peers
                await self._discover_peers()

                # Wait before next discovery cycle
                await asyncio.sleep(self.discovery_interval)

            except Exception as e:
                log_error(f"Discovery loop error: {e}")
                await asyncio.sleep(5)

    async def _connect_to_bootstrap_nodes(self):
        """Connect to bootstrap nodes"""
        for address, port in self.bootstrap_nodes:
            if (address, port) != (self.local_address, self.local_port):
                await self._connect_to_peer(address, port)

    async def _connect_to_peer(self, address: str, port: int) -> bool:
        """Connect to a specific peer"""
        try:
            # Create discovery message
            message = DiscoveryMessage(
                message_type="hello",
                node_id=self.local_node_id,
                address=self.local_address,
                port=self.local_port,
                timestamp=time.time(),
                signature=""  # Would be signed in real implementation
            )

            # Send discovery message
            success = await self._send_discovery_message(address, port, message)

            if success:
                log_info(f"Connected to peer {address}:{port}")
                return True
            else:
                log_warn(f"Failed to connect to peer {address}:{port}")
                return False

        except Exception as e:
            log_error(f"Error connecting to peer {address}:{port}: {e}")
            return False

    async def _send_discovery_message(self, address: str, port: int, message: DiscoveryMessage) -> bool:
        """Send discovery message to peer"""
        try:
            reader, writer = await asyncio.open_connection(address, port)

            # Send message
            message_data = json.dumps(asdict(message)).encode()
            writer.write(message_data)
            await writer.drain()

            # Wait for response
            response_data = await reader.read(4096)
            response = json.loads(response_data.decode())

            writer.close()
            await writer.wait_closed()

            # Process response
            if response.get("message_type") == "hello_response":
                await self._handle_hello_response(response)
                return True

            return False

        except Exception as e:
            log_debug(f"Failed to send discovery message to {address}:{port}: {e}")
            return False

    async def _handle_hello_response(self, response: Dict):
        """Handle hello response from peer"""
        try:
            peer_node_id = response["node_id"]
            peer_address = response["address"]
            peer_port = response["port"]
            peer_capabilities = response.get("capabilities", [])

            # Create peer node
            peer = PeerNode(
                node_id=peer_node_id,
                address=peer_address,
                port=peer_port,
                public_key=response.get("public_key", ""),
                last_seen=time.time(),
                status=NodeStatus.ONLINE,
                capabilities=peer_capabilities,
                reputation=1.0,
                connection_count=0
            )

            # Add to peers
            self.peers[peer_node_id] = peer

            log_info(f"Added peer {peer_node_id} from {peer_address}:{peer_port}")

        except Exception as e:
            log_error(f"Error handling hello response: {e}")

    async def _discover_peers(self):
        """Discover new peers from existing connections"""
        for peer in list(self.peers.values()):
            if peer.status == NodeStatus.ONLINE:
                await self._request_peer_list(peer)

    async def _request_peer_list(self, peer: PeerNode):
        """Request peer list from connected peer"""
        try:
            message = DiscoveryMessage(
                message_type="get_peers",
                node_id=self.local_node_id,
                address=self.local_address,
                port=self.local_port,
                timestamp=time.time(),
                signature=""
            )

            success = await self._send_discovery_message(peer.address, peer.port, message)

            if success:
                log_debug(f"Requested peer list from {peer.node_id}")

        except Exception as e:
            log_error(f"Error requesting peer list from {peer.node_id}: {e}")

    async def _peer_health_check(self):
        """Check health of connected peers"""
        while self.running:
            try:
                current_time = time.time()

                # Check for offline peers
                for peer_id, peer in list(self.peers.items()):
                    if current_time - peer.last_seen > self.peer_timeout:
                        peer.status = NodeStatus.OFFLINE
                        log_warn(f"Peer {peer_id} went offline")

                # Remove offline peers
                self.peers = {
                    peer_id: peer for peer_id, peer in self.peers.items()
                    if peer.status != NodeStatus.OFFLINE or current_time - peer.last_seen < self.peer_timeout * 2
                }

                # Limit peer count
                if len(self.peers) > self.max_peers:
                    # Remove peers with lowest reputation
                    sorted_peers = sorted(
                        self.peers.items(),
                        key=lambda x: x[1].reputation
                    )

                    for peer_id, _ in sorted_peers[:len(self.peers) - self.max_peers]:
                        del self.peers[peer_id]
                        log_info(f"Removed peer {peer_id} due to peer limit")

                await asyncio.sleep(60)  # Check every minute

            except Exception as e:
                log_error(f"Peer health check error: {e}")
                await asyncio.sleep(30)

    async def _listen_for_discovery(self):
        """Listen for incoming discovery messages"""
        server = await asyncio.start_server(
            self._handle_discovery_connection,
            self.local_address,
            self.local_port
        )

        log_info(f"Discovery server listening on {self.local_address}:{self.local_port}")

        async with server:
            await server.serve_forever()

    async def _handle_discovery_connection(self, reader, writer):
        """Handle incoming discovery connection"""
        try:
            # Read message
            data = await reader.read(4096)
            message = json.loads(data.decode())

            # Process message
            response = await self._process_discovery_message(message)

            # Send response
            response_data = json.dumps(response).encode()
            writer.write(response_data)
            await writer.drain()

            writer.close()
            await writer.wait_closed()

        except Exception as e:
            log_error(f"Error handling discovery connection: {e}")

    async def _process_discovery_message(self, message: Dict) -> Dict:
        """Process incoming discovery message"""
        message_type = message.get("message_type")
        node_id = message.get("node_id")

        if message_type == "hello":
            # Respond with peer information
            return {
                "message_type": "hello_response",
                "node_id": self.local_node_id,
                "address": self.local_address,
                "port": self.local_port,
                "public_key": "",  # Would include actual public key
                "capabilities": ["consensus", "mempool", "rpc"],
                "timestamp": time.time()
            }

        elif message_type == "get_peers":
            # Return list of known peers
            peer_list = []
            for peer in self.peers.values():
                if peer.status == NodeStatus.ONLINE:
                    peer_list.append({
                        "node_id": peer.node_id,
                        "address": peer.address,
                        "port": peer.port,
                        "capabilities": peer.capabilities,
                        "reputation": peer.reputation
                    })

            return {
                "message_type": "peers_response",
                "node_id": self.local_node_id,
                "peers": peer_list,
                "timestamp": time.time()
            }

        else:
            return {
                "message_type": "error",
                "error": "Unknown message type",
                "timestamp": time.time()
            }

    def get_peer_count(self) -> int:
        """Get number of connected peers"""
        return len([p for p in self.peers.values() if p.status == NodeStatus.ONLINE])

    def get_peer_list(self) -> List[PeerNode]:
        """Get list of connected peers"""
        return [p for p in self.peers.values() if p.status == NodeStatus.ONLINE]

    def update_peer_reputation(self, node_id: str, delta: float) -> bool:
        """Update peer reputation"""
        if node_id not in self.peers:
            return False

        peer = self.peers[node_id]
        peer.reputation = max(0.0, min(1.0, peer.reputation + delta))
        return True

# Global discovery instance
discovery_instance: Optional[P2PDiscovery] = None

def get_discovery() -> Optional[P2PDiscovery]:
    """Get global discovery instance"""
    return discovery_instance

def create_discovery(node_id: str, address: str, port: int) -> P2PDiscovery:
    """Create and set global discovery instance"""
    global discovery_instance
    discovery_instance = P2PDiscovery(node_id, address, port)
    return discovery_instance
EOF

    log_info "P2P discovery service created"
}

# Function to create peer health monitoring
create_peer_health_monitoring() {
    log_info "Creating peer health monitoring..."

    cat > "$NETWORK_DIR/health.py" << 'EOF'
"""
Peer Health Monitoring Service
Monitors peer liveness and performance metrics
"""

import asyncio
import time
import ping3
import statistics
from typing import Dict, List, Optional, Tuple
from dataclasses import dataclass
from enum import Enum

from .discovery import PeerNode, NodeStatus

class HealthMetric(Enum):
    LATENCY = "latency"
    AVAILABILITY = "availability"
    THROUGHPUT = "throughput"
    ERROR_RATE = "error_rate"

@dataclass
class HealthStatus:
    node_id: str
    status: NodeStatus
    last_check: float
    latency_ms: float
    availability_percent: float
    throughput_mbps: float
    error_rate_percent: float
    consecutive_failures: int
    health_score: float

class PeerHealthMonitor:
    """Monitors health and performance of peer nodes"""

    def __init__(self, check_interval: int = 60):
        self.check_interval = check_interval
        self.health_status: Dict[str, HealthStatus] = {}
        self.running = False
        self.latency_history: Dict[str, List[float]] = {}
        self.max_history_size = 100

        # Health thresholds
        self.max_latency_ms = 1000
        self.min_availability_percent = 90.0
        self.min_health_score = 0.5
        self.max_consecutive_failures = 3

    async def start_monitoring(self, peers: Dict[str, PeerNode]):
        """Start health monitoring for peers"""
        self.running = True
        log_info("Starting peer health monitoring")

        while self.running:
            try:
                await self._check_all_peers(peers)
                await asyncio.sleep(self.check_interval)
            except Exception as e:
                log_error(f"Health monitoring error: {e}")
                await asyncio.sleep(10)

    async def stop_monitoring(self):
        """Stop health monitoring"""
        self.running = False
        log_info("Stopping peer health monitoring")

    async def _check_all_peers(self, peers: Dict[str, PeerNode]):
        """Check health of all peers"""
        tasks = []

        for node_id, peer in peers.items():
            if peer.status == NodeStatus.ONLINE:
                task = asyncio.create_task(self._check_peer_health(peer))
                tasks.append(task)

        if tasks:
            await asyncio.gather(*tasks, return_exceptions=True)

    async def _check_peer_health(self, peer: PeerNode):
        """Check health of individual peer"""
        start_time = time.time()

        try:
            # Check latency
            latency = await self._measure_latency(peer.address, peer.port)

            # Check availability
            availability = await self._check_availability(peer)

            # Check throughput
            throughput = await self._measure_throughput(peer)

            # Calculate health score
            health_score = self._calculate_health_score(latency, availability, throughput)

            # Update health status
            self._update_health_status(peer, NodeStatus.ONLINE, latency, availability, throughput, 0.0, health_score)

            # Reset consecutive failures
            if peer.node_id in self.health_status:
                self.health_status[peer.node_id].consecutive_failures = 0

        except Exception as e:
            log_error(f"Health check failed for peer {peer.node_id}: {e}")

            # Handle failure
            consecutive_failures = self.health_status.get(peer.node_id, HealthStatus(peer.node_id, NodeStatus.OFFLINE, 0, 0, 0, 0, 0, 0, 0.0)).consecutive_failures + 1

            if consecutive_failures >= self.max_consecutive_failures:
                self._update_health_status(peer, NodeStatus.OFFLINE, 0, 0, 0, 100.0, 0.0)
            else:
                self._update_health_status(peer, NodeStatus.ERROR, 0, 0, 0, 0.0, consecutive_failures, 0.0)

    async def _measure_latency(self, address: str, port: int) -> float:
        """Measure network latency to peer"""
        try:
            # Use ping3 for basic latency measurement
            latency = ping3.ping(address, timeout=2)

            if latency is not None:
                latency_ms = latency * 1000

                # Update latency history
                node_id = f"{address}:{port}"
                if node_id not in self.latency_history:
                    self.latency_history[node_id] = []

                self.latency_history[node_id].append(latency_ms)

                # Limit history size
                if len(self.latency_history[node_id]) > self.max_history_size:
                    self.latency_history[node_id].pop(0)

                return latency_ms
            else:
                return float('inf')

        except Exception as e:
            log_debug(f"Latency measurement failed for {address}:{port}: {e}")
            return float('inf')

    async def _check_availability(self, peer: PeerNode) -> float:
        """Check peer availability by attempting connection"""
        try:
            start_time = time.time()

            # Try to connect to peer
            reader, writer = await asyncio.wait_for(
                asyncio.open_connection(peer.address, peer.port),
                timeout=5.0
            )

            connection_time = (time.time() - start_time) * 1000

            writer.close()
            await writer.wait_closed()

            # Calculate availability based on recent history
            node_id = peer.node_id
            if node_id in self.health_status:
                # Simple availability calculation based on success rate
                recent_status = self.health_status[node_id]
                if recent_status.status == NodeStatus.ONLINE:
                    return min(100.0, recent_status.availability_percent + 5.0)
                else:
                    return max(0.0, recent_status.availability_percent - 10.0)
            else:
                return 100.0  # First successful connection

        except Exception as e:
            log_debug(f"Availability check failed for {peer.node_id}: {e}")
            return 0.0

    async def _measure_throughput(self, peer: PeerNode) -> float:
        """Measure network throughput to peer"""
        try:
            # Simple throughput test using small data transfer
            test_data = b"x" * 1024  # 1KB test data

            start_time = time.time()

            reader, writer = await asyncio.open_connection(peer.address, peer.port)

            # Send test data
            writer.write(test_data)
            await writer.drain()

            # Wait for echo response (if peer supports it)
            response = await asyncio.wait_for(reader.read(1024), timeout=2.0)

            transfer_time = time.time() - start_time

            writer.close()
            await writer.wait_closed()

            # Calculate throughput in Mbps
            bytes_transferred = len(test_data) + len(response)
            throughput_mbps = (bytes_transferred * 8) / (transfer_time * 1024 * 1024)

            return throughput_mbps

        except Exception as e:
            log_debug(f"Throughput measurement failed for {peer.node_id}: {e}")
            return 0.0

    def _calculate_health_score(self, latency: float, availability: float, throughput: float) -> float:
        """Calculate overall health score"""
        # Latency score (lower is better)
        latency_score = max(0.0, 1.0 - (latency / self.max_latency_ms))

        # Availability score
        availability_score = availability / 100.0

        # Throughput score (higher is better, normalized to 10 Mbps)
        throughput_score = min(1.0, throughput / 10.0)

        # Weighted average
        health_score = (
            latency_score * 0.3 +
            availability_score * 0.4 +
            throughput_score * 0.3
        )

        return health_score

    def _update_health_status(self, peer: PeerNode, status: NodeStatus, latency: float,
                            availability: float, throughput: float, error_rate: float,
                            consecutive_failures: int = 0, health_score: float = 0.0):
        """Update health status for peer"""
        self.health_status[peer.node_id] = HealthStatus(
            node_id=peer.node_id,
            status=status,
            last_check=time.time(),
            latency_ms=latency,
            availability_percent=availability,
            throughput_mbps=throughput,
            error_rate_percent=error_rate,
            consecutive_failures=consecutive_failures,
            health_score=health_score
        )

        # Update peer status in discovery
        peer.status = status
        peer.last_seen = time.time()

    def get_health_status(self, node_id: str) -> Optional[HealthStatus]:
        """Get health status for specific peer"""
        return self.health_status.get(node_id)

    def get_all_health_status(self) -> Dict[str, HealthStatus]:
        """Get health status for all peers"""
        return self.health_status.copy()

    def get_average_latency(self, node_id: str) -> Optional[float]:
        """Get average latency for peer"""
        node_key = f"{self.health_status.get(node_id, HealthStatus('', NodeStatus.OFFLINE, 0, 0, 0, 0, 0, 0, 0.0)).node_id}"

        if node_key in self.latency_history and self.latency_history[node_key]:
            return statistics.mean(self.latency_history[node_key])

        return None

    def get_healthy_peers(self) -> List[str]:
        """Get list of healthy peers"""
        return [
            node_id for node_id, status in self.health_status.items()
            if status.health_score >= self.min_health_score
        ]

    def get_unhealthy_peers(self) -> List[str]:
        """Get list of unhealthy peers"""
        return [
            node_id for node_id, status in self.health_status.items()
            if status.health_score < self.min_health_score
        ]

# Global health monitor
health_monitor: Optional[PeerHealthMonitor] = None

def get_health_monitor() -> Optional[PeerHealthMonitor]:
    """Get global health monitor"""
    return health_monitor

def create_health_monitor(check_interval: int = 60) -> PeerHealthMonitor:
    """Create and set global health monitor"""
    global health_monitor
    health_monitor = PeerHealthMonitor(check_interval)
    return health_monitor
EOF

    log_info "Peer health monitoring created"
}

# Function to create dynamic peer management
create_dynamic_peer_management() {
    log_info "Creating dynamic peer management..."

    cat > "$NETWORK_DIR/peers.py" << 'EOF'
"""
Dynamic Peer Management
Handles peer join/leave operations and connection management
"""

import asyncio
import time
from typing import Dict, List, Optional, Set
from dataclasses import dataclass
from enum import Enum

from .discovery import PeerNode, NodeStatus, P2PDiscovery
from .health import PeerHealthMonitor, HealthStatus

class PeerAction(Enum):
    JOIN = "join"
    LEAVE = "leave"
    DEMOTE = "demote"
    PROMOTE = "promote"
    BAN = "ban"

@dataclass
class PeerEvent:
    action: PeerAction
    node_id: str
    timestamp: float
    reason: str
    metadata: Dict

class DynamicPeerManager:
    """Manages dynamic peer connections and lifecycle"""

    def __init__(self, discovery: P2PDiscovery, health_monitor: PeerHealthMonitor):
        self.discovery = discovery
        self.health_monitor = health_monitor
        self.peer_events: List[PeerEvent] = []
        self.max_connections = 50
        self.min_connections = 8
        self.connection_retry_interval = 300  # 5 minutes
        self.ban_threshold = 0.1  # Reputation below this gets banned
        self.running = False

        # Peer management policies
        self.auto_reconnect = True
        self.auto_ban_malicious = True
        self.load_balance = True

    async def start_management(self):
        """Start peer management service"""
        self.running = True
        log_info("Starting dynamic peer management")

        while self.running:
            try:
                await self._manage_peer_connections()
                await self._enforce_peer_policies()
                await self._optimize_topology()
                await asyncio.sleep(30)  # Check every 30 seconds
            except Exception as e:
                log_error(f"Peer management error: {e}")
                await asyncio.sleep(10)

    async def stop_management(self):
        """Stop peer management service"""
        self.running = False
        log_info("Stopping dynamic peer management")

    async def _manage_peer_connections(self):
        """Manage peer connections based on current state"""
        current_peers = self.discovery.get_peer_count()

        if current_peers < self.min_connections:
            await self._discover_new_peers()
        elif current_peers > self.max_connections:
            await self._remove_excess_peers()

        # Reconnect to disconnected peers
        if self.auto_reconnect:
            await self._reconnect_disconnected_peers()

    async def _discover_new_peers(self):
        """Discover and connect to new peers"""
        log_info(f"Peer count ({self.discovery.get_peer_count()}) below minimum ({self.min_connections}), discovering new peers")

        # Request peer lists from existing connections
        for peer in self.discovery.get_peer_list():
            await self.discovery._request_peer_list(peer)

        # Try to connect to bootstrap nodes
        await self.discovery._connect_to_bootstrap_nodes()

    async def _remove_excess_peers(self):
        """Remove excess peers based on quality metrics"""
        log_info(f"Peer count ({self.discovery.get_peer_count()}) above maximum ({self.max_connections}), removing excess peers")

        peers = self.discovery.get_peer_list()

        # Sort peers by health score and reputation
        sorted_peers = sorted(
            peers,
            key=lambda p: (
                self.health_monitor.get_health_status(p.node_id).health_score if
                self.health_monitor.get_health_status(p.node_id) else 0.0,
                p.reputation
            )
        )

        # Remove lowest quality peers
        excess_count = len(peers) - self.max_connections
        for i in range(excess_count):
            peer_to_remove = sorted_peers[i]
            await self._remove_peer(peer_to_remove.node_id, "Excess peer removed")

    async def _reconnect_disconnected_peers(self):
        """Reconnect to peers that went offline"""
        # Get recently disconnected peers
        all_health = self.health_monitor.get_all_health_status()

        for node_id, health in all_health.items():
            if (health.status == NodeStatus.OFFLINE and
                time.time() - health.last_check < self.connection_retry_interval):

                # Try to reconnect
                peer = self.discovery.peers.get(node_id)
                if peer:
                    success = await self.discovery._connect_to_peer(peer.address, peer.port)
                    if success:
                        log_info(f"Reconnected to peer {node_id}")

    async def _enforce_peer_policies(self):
        """Enforce peer management policies"""
        if self.auto_ban_malicious:
            await self._ban_malicious_peers()

        await self._update_peer_reputations()

    async def _ban_malicious_peers(self):
        """Ban peers with malicious behavior"""
        for peer in self.discovery.get_peer_list():
            if peer.reputation < self.ban_threshold:
                await self._ban_peer(peer.node_id, "Reputation below threshold")

    async def _update_peer_reputations(self):
        """Update peer reputations based on health metrics"""
        for peer in self.discovery.get_peer_list():
            health = self.health_monitor.get_health_status(peer.node_id)

            if health:
                # Update reputation based on health score
                reputation_delta = (health.health_score - 0.5) * 0.1  # Small adjustments
                self.discovery.update_peer_reputation(peer.node_id, reputation_delta)

    async def _optimize_topology(self):
        """Optimize network topology for better performance"""
        if not self.load_balance:
            return

        peers = self.discovery.get_peer_list()
        healthy_peers = self.health_monitor.get_healthy_peers()

        # Prioritize connections to healthy peers
        for peer in peers:
            if peer.node_id not in healthy_peers:
                # Consider replacing unhealthy peer
                await self._consider_peer_replacement(peer)

    async def _consider_peer_replacement(self, unhealthy_peer: PeerNode):
        """Consider replacing unhealthy peer with better alternative"""
        # This would implement logic to find and connect to better peers
        # For now, just log the consideration
        log_info(f"Considering replacement for unhealthy peer {unhealthy_peer.node_id}")

    async def add_peer(self, address: str, port: int, public_key: str = "") -> bool:
        """Manually add a new peer"""
        try:
            success = await self.discovery._connect_to_peer(address, port)

            if success:
                # Record peer join event
                self._record_peer_event(PeerAction.JOIN, f"{address}:{port}", "Manual peer addition")
                log_info(f"Successfully added peer {address}:{port}")
                return True
            else:
                log_warn(f"Failed to add peer {address}:{port}")
                return False

        except Exception as e:
            log_error(f"Error adding peer {address}:{port}: {e}")
            return False

    async def remove_peer(self, node_id: str, reason: str = "Manual removal") -> bool:
        """Manually remove a peer"""
        return await self._remove_peer(node_id, reason)

    async def _remove_peer(self, node_id: str, reason: str) -> bool:
        """Remove peer from network"""
        try:
            if node_id in self.discovery.peers:
                peer = self.discovery.peers[node_id]

                # Close connection if open
                # This would be implemented with actual connection management

                # Remove from discovery
                del self.discovery.peers[node_id]

                # Remove from health monitoring
                if node_id in self.health_monitor.health_status:
                    del self.health_monitor.health_status[node_id]

                # Record peer leave event
                self._record_peer_event(PeerAction.LEAVE, node_id, reason)

                log_info(f"Removed peer {node_id}: {reason}")
                return True
            else:
                log_warn(f"Peer {node_id} not found for removal")
                return False

        except Exception as e:
            log_error(f"Error removing peer {node_id}: {e}")
            return False

    async def ban_peer(self, node_id: str, reason: str = "Banned by administrator") -> bool:
        """Ban a peer from the network"""
        return await self._ban_peer(node_id, reason)

    async def _ban_peer(self, node_id: str, reason: str) -> bool:
        """Ban peer and prevent reconnection"""
        success = await self._remove_peer(node_id, f"BANNED: {reason}")

        if success:
            # Record ban event
            self._record_peer_event(PeerAction.BAN, node_id, reason)

            # Add to ban list (would be persistent in real implementation)
            log_info(f"Banned peer {node_id}: {reason}")

        return success

    async def promote_peer(self, node_id: str) -> bool:
        """Promote peer to higher priority"""
        try:
            if node_id in self.discovery.peers:
                peer = self.discovery.peers[node_id]

                # Increase reputation
                self.discovery.update_peer_reputation(node_id, 0.1)

                # Record promotion event
                self._record_peer_event(PeerAction.PROMOTE, node_id, "Peer promoted")

                log_info(f"Promoted peer {node_id}")
                return True
            else:
                log_warn(f"Peer {node_id} not found for promotion")
                return False

        except Exception as e:
            log_error(f"Error promoting peer {node_id}: {e}")
            return False

    async def demote_peer(self, node_id: str) -> bool:
        """Demote peer to lower priority"""
        try:
            if node_id in self.discovery.peers:
                peer = self.discovery.peers[node_id]

                # Decrease reputation
                self.discovery.update_peer_reputation(node_id, -0.1)

                # Record demotion event
                self._record_peer_event(PeerAction.DEMOTE, node_id, "Peer demoted")

                log_info(f"Demoted peer {node_id}")
                return True
            else:
                log_warn(f"Peer {node_id} not found for demotion")
                return False

        except Exception as e:
            log_error(f"Error demoting peer {node_id}: {e}")
            return False

    def _record_peer_event(self, action: PeerAction, node_id: str, reason: str, metadata: Dict = None):
        """Record peer management event"""
        event = PeerEvent(
            action=action,
            node_id=node_id,
            timestamp=time.time(),
            reason=reason,
            metadata=metadata or {}
        )

        self.peer_events.append(event)

        # Limit event history size
        if len(self.peer_events) > 1000:
            self.peer_events = self.peer_events[-500:]  # Keep last 500 events

    def get_peer_events(self, node_id: Optional[str] = None, limit: int = 100) -> List[PeerEvent]:
        """Get peer management events"""
        events = self.peer_events

        if node_id:
            events = [e for e in events if e.node_id == node_id]

        return events[-limit:]

    def get_peer_statistics(self) -> Dict:
        """Get peer management statistics"""
        peers = self.discovery.get_peer_list()
        health_status = self.health_monitor.get_all_health_status()

        stats = {
            "total_peers": len(peers),
            "healthy_peers": len(self.health_monitor.get_healthy_peers()),
            "unhealthy_peers": len(self.health_monitor.get_unhealthy_peers()),
            "average_reputation": sum(p.reputation for p in peers) / len(peers) if peers else 0,
            "average_health_score": sum(h.health_score for h in health_status.values()) / len(health_status) if health_status else 0,
            "recent_events": len([e for e in self.peer_events if time.time() - e.timestamp < 3600])  # Last hour
        }

        return stats

# Global peer manager
peer_manager: Optional[DynamicPeerManager] = None

def get_peer_manager() -> Optional[DynamicPeerManager]:
    """Get global peer manager"""
    return peer_manager

def create_peer_manager(discovery: P2PDiscovery, health_monitor: PeerHealthMonitor) -> DynamicPeerManager:
    """Create and set global peer manager"""
    global peer_manager
    peer_manager = DynamicPeerManager(discovery, health_monitor)
    return peer_manager
EOF

    log_info "Dynamic peer management created"
}

# Function to create network topology optimization
create_topology_optimization() {
    log_info "Creating network topology optimization..."

    cat > "$NETWORK_DIR/topology.py" << 'EOF'
"""
Network Topology Optimization
Optimizes peer connection strategies for network performance
"""

import asyncio
import networkx as nx
import time
from typing import Dict, List, Set, Tuple, Optional
from dataclasses import dataclass
from enum import Enum

from .discovery import PeerNode, P2PDiscovery
from .health import PeerHealthMonitor, HealthStatus

class TopologyStrategy(Enum):
    SMALL_WORLD = "small_world"
    SCALE_FREE = "scale_free"
    MESH = "mesh"
    HYBRID = "hybrid"

@dataclass
class ConnectionWeight:
    source: str
    target: str
    weight: float
    latency: float
    bandwidth: float
    reliability: float

class NetworkTopology:
    """Manages and optimizes network topology"""

    def __init__(self, discovery: P2PDiscovery, health_monitor: PeerHealthMonitor):
        self.discovery = discovery
        self.health_monitor = health_monitor
        self.graph = nx.Graph()
        self.strategy = TopologyStrategy.HYBRID
        self.optimization_interval = 300  # 5 minutes
        self.max_degree = 8
        self.min_degree = 3
        self.running = False

        # Topology metrics
        self.avg_path_length = 0
        self.clustering_coefficient = 0
        self.network_efficiency = 0

    async def start_optimization(self):
        """Start topology optimization service"""
        self.running = True
        log_info("Starting network topology optimization")

        # Initialize graph
        await self._build_initial_graph()

        while self.running:
            try:
                await self._optimize_topology()
                await self._calculate_metrics()
                await asyncio.sleep(self.optimization_interval)
            except Exception as e:
                log_error(f"Topology optimization error: {e}")
                await asyncio.sleep(30)

    async def stop_optimization(self):
        """Stop topology optimization service"""
        self.running = False
        log_info("Stopping network topology optimization")

    async def _build_initial_graph(self):
        """Build initial network graph from current peers"""
        self.graph.clear()

        # Add all peers as nodes
        for peer in self.discovery.get_peer_list():
            self.graph.add_node(peer.node_id, **{
                'address': peer.address,
                'port': peer.port,
                'reputation': peer.reputation,
                'capabilities': peer.capabilities
            })

        # Add edges based on current connections
        await self._add_connection_edges()

    async def _add_connection_edges(self):
        """Add edges for current peer connections"""
        peers = self.discovery.get_peer_list()

        # In a real implementation, this would use actual connection data
        # For now, create a mesh topology
        for i, peer1 in enumerate(peers):
            for peer2 in peers[i+1:]:
                if self._should_connect(peer1, peer2):
                    weight = await self._calculate_connection_weight(peer1, peer2)
                    self.graph.add_edge(peer1.node_id, peer2.node_id, weight=weight)

    def _should_connect(self, peer1: PeerNode, peer2: PeerNode) -> bool:
        """Determine if two peers should be connected"""
        # Check degree constraints
        if (self.graph.degree(peer1.node_id) >= self.max_degree or
            self.graph.degree(peer2.node_id) >= self.max_degree):
            return False

        # Check strategy-specific rules
        if self.strategy == TopologyStrategy.SMALL_WORLD:
            return self._small_world_should_connect(peer1, peer2)
        elif self.strategy == TopologyStrategy.SCALE_FREE:
            return self._scale_free_should_connect(peer1, peer2)
        elif self.strategy == TopologyStrategy.MESH:
            return self._mesh_should_connect(peer1, peer2)
        elif self.strategy == TopologyStrategy.HYBRID:
            return self._hybrid_should_connect(peer1, peer2)

        return False

    def _small_world_should_connect(self, peer1: PeerNode, peer2: PeerNode) -> bool:
        """Small world topology connection logic"""
        # Connect to nearby peers and some random long-range connections
        import random

        if random.random() < 0.1:  # 10% random connections
            return True

        # Connect based on geographic or network proximity (simplified)
        return random.random() < 0.3  # 30% of nearby connections

    def _scale_free_should_connect(self, peer1: PeerNode, peer2: PeerNode) -> bool:
        """Scale-free topology connection logic"""
        # Prefer connecting to high-degree nodes (rich-get-richer)
        degree1 = self.graph.degree(peer1.node_id)
        degree2 = self.graph.degree(peer2.node_id)

        # Higher probability for nodes with higher degree
        connection_probability = (degree1 + degree2) / (2 * self.max_degree)
        return random.random() < connection_probability

    def _mesh_should_connect(self, peer1: PeerNode, peer2: PeerNode) -> bool:
        """Full mesh topology connection logic"""
        # Connect to all peers (within degree limits)
        return True

    def _hybrid_should_connect(self, peer1: PeerNode, peer2: PeerNode) -> bool:
        """Hybrid topology connection logic"""
        # Combine multiple strategies
        import random

        # 40% small world, 30% scale-free, 30% mesh
        strategy_choice = random.random()

        if strategy_choice < 0.4:
            return self._small_world_should_connect(peer1, peer2)
        elif strategy_choice < 0.7:
            return self._scale_free_should_connect(peer1, peer2)
        else:
            return self._mesh_should_connect(peer1, peer2)

    async def _calculate_connection_weight(self, peer1: PeerNode, peer2: PeerNode) -> float:
        """Calculate connection weight between two peers"""
        # Get health metrics
        health1 = self.health_monitor.get_health_status(peer1.node_id)
        health2 = self.health_monitor.get_health_status(peer2.node_id)

        # Calculate weight based on health, reputation, and performance
        weight = 1.0

        if health1 and health2:
            # Factor in health scores
            weight *= (health1.health_score + health2.health_score) / 2

        # Factor in reputation
        weight *= (peer1.reputation + peer2.reputation) / 2

        # Factor in latency (inverse relationship)
        if health1 and health1.latency_ms > 0:
            weight *= min(1.0, 1000 / health1.latency_ms)

        return max(0.1, weight)  # Minimum weight of 0.1

    async def _optimize_topology(self):
        """Optimize network topology"""
        log_info("Optimizing network topology")

        # Analyze current topology
        await self._analyze_topology()

        # Identify optimization opportunities
        improvements = await self._identify_improvements()

        # Apply improvements
        for improvement in improvements:
            await self._apply_improvement(improvement)

    async def _analyze_topology(self):
        """Analyze current network topology"""
        if len(self.graph.nodes()) == 0:
            return

        # Calculate basic metrics
        if nx.is_connected(self.graph):
            self.avg_path_length = nx.average_shortest_path_length(self.graph, weight='weight')
        else:
            self.avg_path_length = float('inf')

        self.clustering_coefficient = nx.average_clustering(self.graph)

        # Calculate network efficiency
        self.network_efficiency = nx.global_efficiency(self.graph)

        log_info(f"Topology metrics - Path length: {self.avg_path_length:.2f}, "
                f"Clustering: {self.clustering_coefficient:.2f}, "
                f"Efficiency: {self.network_efficiency:.2f}")

    async def _identify_improvements(self) -> List[Dict]:
        """Identify topology improvements"""
        improvements = []

        # Check for disconnected nodes
        if not nx.is_connected(self.graph):
            components = list(nx.connected_components(self.graph))
            if len(components) > 1:
                improvements.append({
                    'type': 'connect_components',
                    'components': components
                })

        # Check degree distribution
        degrees = dict(self.graph.degree())
        low_degree_nodes = [node for node, degree in degrees.items() if degree < self.min_degree]
        high_degree_nodes = [node for node, degree in degrees.items() if degree > self.max_degree]

        if low_degree_nodes:
            improvements.append({
                'type': 'increase_degree',
                'nodes': low_degree_nodes
            })

        if high_degree_nodes:
            improvements.append({
                'type': 'decrease_degree',
                'nodes': high_degree_nodes
            })

        # Check for inefficient paths
        if self.avg_path_length > 6:  # Too many hops
            improvements.append({
                'type': 'add_shortcuts',
                'target_path_length': 4
            })

        return improvements

    async def _apply_improvement(self, improvement: Dict):
        """Apply topology improvement"""
        improvement_type = improvement['type']

        if improvement_type == 'connect_components':
            await self._connect_components(improvement['components'])
        elif improvement_type == 'increase_degree':
            await self._increase_node_degree(improvement['nodes'])
        elif improvement_type == 'decrease_degree':
            await self._decrease_node_degree(improvement['nodes'])
        elif improvement_type == 'add_shortcuts':
            await self._add_shortcuts(improvement['target_path_length'])

    async def _connect_components(self, components: List[Set[str]]):
        """Connect disconnected components"""
        log_info(f"Connecting {len(components)} disconnected components")

        # Connect components by adding edges between representative nodes
        for i in range(len(components) - 1):
            component1 = list(components[i])
            component2 = list(components[i + 1])

            # Select best nodes to connect
            node1 = self._select_best_connection_node(component1)
            node2 = self._select_best_connection_node(component2)

            # Add connection
            if node1 and node2:
                peer1 = self.discovery.peers.get(node1)
                peer2 = self.discovery.peers.get(node2)

                if peer1 and peer2:
                    await self._establish_connection(peer1, peer2)

    async def _increase_node_degree(self, nodes: List[str]):
        """Increase degree of low-degree nodes"""
        for node_id in nodes:
            peer = self.discovery.peers.get(node_id)
            if not peer:
                continue

            # Find best candidates for connection
            candidates = await self._find_connection_candidates(peer, max_connections=2)

            for candidate_peer in candidates:
                await self._establish_connection(peer, candidate_peer)

    async def _decrease_node_degree(self, nodes: List[str]):
        """Decrease degree of high-degree nodes"""
        for node_id in nodes:
            # Remove lowest quality connections
            edges = list(self.graph.edges(node_id, data=True))

            # Sort by weight (lowest first)
            edges.sort(key=lambda x: x[2].get('weight', 1.0))

            # Remove excess connections
            excess_count = self.graph.degree(node_id) - self.max_degree
            for i in range(min(excess_count, len(edges))):
                edge = edges[i]
                await self._remove_connection(edge[0], edge[1])

    async def _add_shortcuts(self, target_path_length: float):
        """Add shortcut connections to reduce path length"""
        # Find pairs of nodes with long shortest paths
        all_pairs = dict(nx.all_pairs_shortest_path_length(self.graph))

        long_paths = []
        for node1, paths in all_pairs.items():
            for node2, distance in paths.items():
                if node1 != node2 and distance > target_path_length:
                    long_paths.append((node1, node2, distance))

        # Sort by path length (longest first)
        long_paths.sort(key=lambda x: x[2], reverse=True)

        # Add shortcuts for longest paths
        for node1_id, node2_id, _ in long_paths[:5]:  # Limit to 5 shortcuts
            peer1 = self.discovery.peers.get(node1_id)
            peer2 = self.discovery.peers.get(node2_id)

            if peer1 and peer2 and not self.graph.has_edge(node1_id, node2_id):
                await self._establish_connection(peer1, peer2)

    def _select_best_connection_node(self, nodes: List[str]) -> Optional[str]:
        """Select best node for inter-component connection"""
        best_node = None
        best_score = 0

        for node_id in nodes:
            peer = self.discovery.peers.get(node_id)
            if not peer:
                continue

            # Score based on reputation and health
            health = self.health_monitor.get_health_status(node_id)
            score = peer.reputation

            if health:
                score *= health.health_score

            if score > best_score:
                best_score = score
                best_node = node_id

        return best_node

    async def _find_connection_candidates(self, peer: PeerNode, max_connections: int = 3) -> List[PeerNode]:
        """Find best candidates for new connections"""
        candidates = []

        for candidate_peer in self.discovery.get_peer_list():
            if (candidate_peer.node_id == peer.node_id or
                self.graph.has_edge(peer.node_id, candidate_peer.node_id)):
                continue

            # Score candidate
            score = await self._calculate_connection_weight(peer, candidate_peer)
            candidates.append((candidate_peer, score))

        # Sort by score and return top candidates
        candidates.sort(key=lambda x: x[1], reverse=True)
        return [candidate for candidate, _ in candidates[:max_connections]]

    async def _establish_connection(self, peer1: PeerNode, peer2: PeerNode):
        """Establish connection between two peers"""
        try:
            # In a real implementation, this would establish actual network connection
            weight = await self._calculate_connection_weight(peer1, peer2)

            self.graph.add_edge(peer1.node_id, peer2.node_id, weight=weight)

            log_info(f"Established connection between {peer1.node_id} and {peer2.node_id}")

        except Exception as e:
            log_error(f"Failed to establish connection between {peer1.node_id} and {peer2.node_id}: {e}")

    async def _remove_connection(self, node1_id: str, node2_id: str):
        """Remove connection between two nodes"""
        try:
            if self.graph.has_edge(node1_id, node2_id):
                self.graph.remove_edge(node1_id, node2_id)
                log_info(f"Removed connection between {node1_id} and {node2_id}")
        except Exception as e:
            log_error(f"Failed to remove connection between {node1_id} and {node2_id}: {e}")

    def get_topology_metrics(self) -> Dict:
        """Get current topology metrics"""
        return {
            'node_count': len(self.graph.nodes()),
            'edge_count': len(self.graph.edges()),
            'avg_degree': sum(dict(self.graph.degree()).values()) / len(self.graph.nodes()) if self.graph.nodes() else 0,
            'avg_path_length': self.avg_path_length,
            'clustering_coefficient': self.clustering_coefficient,
            'network_efficiency': self.network_efficiency,
            'is_connected': nx.is_connected(self.graph),
            'strategy': self.strategy.value
        }

    def get_visualization_data(self) -> Dict:
        """Get data for network visualization"""
        nodes = []
        edges = []

        for node_id in self.graph.nodes():
            node_data = self.graph.nodes[node_id]
            peer = self.discovery.peers.get(node_id)

            nodes.append({
                'id': node_id,
                'address': node_data.get('address', ''),
                'reputation': node_data.get('reputation', 0),
                'degree': self.graph.degree(node_id)
            })

        for edge in self.graph.edges(data=True):
            edges.append({
                'source': edge[0],
                'target': edge[1],
                'weight': edge[2].get('weight', 1.0)
            })

        return {
            'nodes': nodes,
            'edges': edges
        }

# Global topology manager
topology_manager: Optional[NetworkTopology] = None

def get_topology_manager() -> Optional[NetworkTopology]:
    """Get global topology manager"""
    return topology_manager

def create_topology_manager(discovery: P2PDiscovery, health_monitor: PeerHealthMonitor) -> NetworkTopology:
    """Create and set global topology manager"""
    global topology_manager
    topology_manager = NetworkTopology(discovery, health_monitor)
    return topology_manager
EOF

    log_info "Network topology optimization created"
}

# Function to create network partition handling
create_partition_handling() {
    log_info "Creating network partition handling..."

    cat > "$NETWORK_DIR/partition.py" << 'EOF'
"""
Network Partition Detection and Recovery
Handles network split detection and automatic recovery
"""

import asyncio
import time
from typing import Dict, List, Set, Optional, Tuple
from dataclasses import dataclass
from enum import Enum

from .discovery import P2PDiscovery, PeerNode, NodeStatus
from .health import PeerHealthMonitor, HealthStatus

class PartitionState(Enum):
    HEALTHY = "healthy"
    PARTITIONED = "partitioned"
    RECOVERING = "recovering"
    ISOLATED = "isolated"

@dataclass
class PartitionInfo:
    partition_id: str
    nodes: Set[str]
    leader: Optional[str]
    size: int
    created_at: float
    last_seen: float

class NetworkPartitionManager:
    """Manages network partition detection and recovery"""

    def __init__(self, discovery: P2PDiscovery, health_monitor: PeerHealthMonitor):
        self.discovery = discovery
        self.health_monitor = health_monitor
        self.current_state = PartitionState.HEALTHY
        self.partitions: Dict[str, PartitionInfo] = {}
        self.local_partition_id = None
        self.detection_interval = 30  # seconds
        self.recovery_timeout = 300  # 5 minutes
        self.max_partition_size = 0.4  # Max 40% of network in one partition
        self.running = False

        # Partition detection thresholds
        self.min_connected_nodes = 3
        self.partition_detection_threshold = 0.3  # 30% of network unreachable

    async def start_partition_monitoring(self):
        """Start partition monitoring service"""
        self.running = True
        log_info("Starting network partition monitoring")

        while self.running:
            try:
                await self._detect_partitions()
                await self._handle_partitions()
                await asyncio.sleep(self.detection_interval)
            except Exception as e:
                log_error(f"Partition monitoring error: {e}")
                await asyncio.sleep(10)

    async def stop_partition_monitoring(self):
        """Stop partition monitoring service"""
        self.running = False
        log_info("Stopping network partition monitoring")

    async def _detect_partitions(self):
        """Detect network partitions"""
        current_peers = self.discovery.get_peer_list()
        total_nodes = len(current_peers) + 1  # +1 for local node

        # Check connectivity
        reachable_nodes = set()
        unreachable_nodes = set()

        for peer in current_peers:
            health = self.health_monitor.get_health_status(peer.node_id)
            if health and health.status == NodeStatus.ONLINE:
                reachable_nodes.add(peer.node_id)
            else:
                unreachable_nodes.add(peer.node_id)

        # Calculate partition metrics
        reachable_ratio = len(reachable_nodes) / total_nodes if total_nodes > 0 else 0

        log_info(f"Network connectivity: {len(reachable_nodes)}/{total_nodes} reachable ({reachable_ratio:.2%})")

        # Detect partition
        if reachable_ratio < (1 - self.partition_detection_threshold):
            await self._handle_partition_detected(reachable_nodes, unreachable_nodes)
        else:
            await self._handle_partition_healed()

    async def _handle_partition_detected(self, reachable_nodes: Set[str], unreachable_nodes: Set[str]):
        """Handle detected network partition"""
        if self.current_state == PartitionState.HEALTHY:
            log_warn(f"Network partition detected! Reachable: {len(reachable_nodes)}, Unreachable: {len(unreachable_nodes)}")
            self.current_state = PartitionState.PARTITIONED

            # Create partition info
            partition_id = self._generate_partition_id(reachable_nodes)
            self.local_partition_id = partition_id

            self.partitions[partition_id] = PartitionInfo(
                partition_id=partition_id,
                nodes=reachable_nodes.copy(),
                leader=None,
                size=len(reachable_nodes),
                created_at=time.time(),
                last_seen=time.time()
            )

            # Start recovery procedures
            asyncio.create_task(self._start_partition_recovery())

    async def _handle_partition_healed(self):
        """Handle healed network partition"""
        if self.current_state in [PartitionState.PARTITIONED, PartitionState.RECOVERING]:
            log_info("Network partition healed!")
            self.current_state = PartitionState.HEALTHY

            # Clear partition info
            self.partitions.clear()
            self.local_partition_id = None

    async def _handle_partitions(self):
        """Handle active partitions"""
        if self.current_state == PartitionState.PARTITIONED:
            await self._maintain_partition()
        elif self.current_state == PartitionState.RECOVERING:
            await self._monitor_recovery()

    async def _maintain_partition(self):
        """Maintain operations during partition"""
        if not self.local_partition_id:
            return

        partition = self.partitions.get(self.local_partition_id)
        if not partition:
            return

        # Update partition info
        current_peers = set(peer.node_id for peer in self.discovery.get_peer_list())
        partition.nodes = current_peers
        partition.last_seen = time.time()
        partition.size = len(current_peers)

        # Select leader if none exists
        if not partition.leader:
            partition.leader = self._select_partition_leader(current_peers)
            log_info(f"Selected partition leader: {partition.leader}")

    async def _start_partition_recovery(self):
        """Start partition recovery procedures"""
        log_info("Starting partition recovery procedures")

        recovery_tasks = [
            asyncio.create_task(self._attempt_reconnection()),
            asyncio.create_task(self._bootstrap_from_known_nodes()),
            asyncio.create_task(self._coordinate_with_other_partitions())
        ]

        try:
            await asyncio.gather(*recovery_tasks, return_exceptions=True)
        except Exception as e:
            log_error(f"Partition recovery error: {e}")

    async def _attempt_reconnection(self):
        """Attempt to reconnect to unreachable nodes"""
        if not self.local_partition_id:
            return

        partition = self.partitions[self.local_partition_id]

        # Try to reconnect to known unreachable nodes
        all_known_peers = self.discovery.peers.copy()

        for node_id, peer in all_known_peers.items():
            if node_id not in partition.nodes:
                # Try to reconnect
                success = await self.discovery._connect_to_peer(peer.address, peer.port)

                if success:
                    log_info(f"Reconnected to node {node_id} during partition recovery")

    async def _bootstrap_from_known_nodes(self):
        """Bootstrap network from known good nodes"""
        # Try to connect to bootstrap nodes
        for address, port in self.discovery.bootstrap_nodes:
            try:
                success = await self.discovery._connect_to_peer(address, port)
                if success:
                    log_info(f"Bootstrap successful to {address}:{port}")
                    break
            except Exception as e:
                log_debug(f"Bootstrap failed to {address}:{port}: {e}")

    async def _coordinate_with_other_partitions(self):
        """Coordinate with other partitions (if detectable)"""
        # In a real implementation, this would use partition detection protocols
        # For now, just log the attempt
        log_info("Attempting to coordinate with other partitions")

    async def _monitor_recovery(self):
        """Monitor partition recovery progress"""
        if not self.local_partition_id:
            return

        partition = self.partitions[self.local_partition_id]

        # Check if recovery is taking too long
        if time.time() - partition.created_at > self.recovery_timeout:
            log_warn("Partition recovery timeout, considering extended recovery strategies")
            await self._extended_recovery_strategies()

    async def _extended_recovery_strategies(self):
        """Implement extended recovery strategies"""
        # Try alternative discovery methods
        await self._alternative_discovery()

        # Consider network reconfiguration
        await self._network_reconfiguration()

    async def _alternative_discovery(self):
        """Try alternative peer discovery methods"""
        log_info("Trying alternative discovery methods")

        # Try DNS-based discovery
        await self._dns_discovery()

        # Try multicast discovery
        await self._multicast_discovery()

    async def _dns_discovery(self):
        """DNS-based peer discovery"""
        # In a real implementation, this would query DNS records
        log_debug("Attempting DNS-based discovery")

    async def _multicast_discovery(self):
        """Multicast-based peer discovery"""
        # In a real implementation, this would use multicast packets
        log_debug("Attempting multicast discovery")

    async def _network_reconfiguration(self):
        """Reconfigure network for partition resilience"""
        log_info("Reconfiguring network for partition resilience")

        # Increase connection retry intervals
        # Adjust topology for better fault tolerance
        # Enable alternative communication channels

    def _generate_partition_id(self, nodes: Set[str]) -> str:
        """Generate unique partition ID"""
        import hashlib

        sorted_nodes = sorted(nodes)
        content = "|".join(sorted_nodes)
        return hashlib.sha256(content.encode()).hexdigest()[:16]

    def _select_partition_leader(self, nodes: Set[str]) -> Optional[str]:
        """Select leader for partition"""
        if not nodes:
            return None

        # Select node with highest reputation
        best_node = None
        best_reputation = 0

        for node_id in nodes:
            peer = self.discovery.peers.get(node_id)
            if peer and peer.reputation > best_reputation:
                best_reputation = peer.reputation
                best_node = node_id

        return best_node

    def get_partition_status(self) -> Dict:
        """Get current partition status"""
        return {
            'state': self.current_state.value,
            'local_partition_id': self.local_partition_id,
            'partition_count': len(self.partitions),
            'partitions': {
                pid: {
                    'size': info.size,
                    'leader': info.leader,
                    'created_at': info.created_at,
                    'last_seen': info.last_seen
                }
                for pid, info in self.partitions.items()
            }
        }

    def is_partitioned(self) -> bool:
        """Check if network is currently partitioned"""
        return self.current_state in [PartitionState.PARTITIONED, PartitionState.RECOVERING]

    def get_local_partition_size(self) -> int:
        """Get size of local partition"""
        if not self.local_partition_id:
            return 0

        partition = self.partitions.get(self.local_partition_id)
        return partition.size if partition else 0

# Global partition manager
partition_manager: Optional[NetworkPartitionManager] = None

def get_partition_manager() -> Optional[NetworkPartitionManager]:
    """Get global partition manager"""
    return partition_manager

def create_partition_manager(discovery: P2PDiscovery, health_monitor: PeerHealthMonitor) -> NetworkPartitionManager:
    """Create and set global partition manager"""
    global partition_manager
    partition_manager = NetworkPartitionManager(discovery, health_monitor)
    return partition_manager
EOF

    log_info "Network partition handling created"
}

# Function to create network recovery mechanisms
create_recovery_mechanisms() {
    log_info "Creating network recovery mechanisms..."

    cat > "$NETWORK_DIR/recovery.py" << 'EOF'
"""
Network Recovery Mechanisms
Implements automatic network healing and recovery procedures
"""

import asyncio
import time
from typing import Dict, List, Optional, Set
from dataclasses import dataclass
from enum import Enum

from .discovery import P2PDiscovery, PeerNode
from .health import PeerHealthMonitor
from .partition import NetworkPartitionManager, PartitionState

class RecoveryStrategy(Enum):
    AGGRESSIVE = "aggressive"
    CONSERVATIVE = "conservative"
    ADAPTIVE = "adaptive"

class RecoveryTrigger(Enum):
    PARTITION_DETECTED = "partition_detected"
    HIGH_LATENCY = "high_latency"
    PEER_FAILURE = "peer_failure"
    MANUAL = "manual"

@dataclass
class RecoveryAction:
    action_type: str
    target_node: str
    priority: int
    created_at: float
    attempts: int
    max_attempts: int
    success: bool

class NetworkRecoveryManager:
    """Manages automatic network recovery procedures"""

    def __init__(self, discovery: P2PDiscovery, health_monitor: PeerHealthMonitor,
                 partition_manager: NetworkPartitionManager):
        self.discovery = discovery
        self.health_monitor = health_monitor
        self.partition_manager = partition_manager
        self.recovery_strategy = RecoveryStrategy.ADAPTIVE
        self.recovery_actions: List[RecoveryAction] = []
        self.running = False
        self.recovery_interval = 60  # seconds

        # Recovery parameters
        self.max_recovery_attempts = 3
        self.recovery_timeout = 300  # 5 minutes
        self.emergency_threshold = 0.1  # 10% of network remaining

    async def start_recovery_service(self):
        """Start network recovery service"""
        self.running = True
        log_info("Starting network recovery service")

        while self.running:
            try:
                await self._process_recovery_actions()
                await self._monitor_network_health()
                await self._adaptive_strategy_adjustment()
                await asyncio.sleep(self.recovery_interval)
            except Exception as e:
                log_error(f"Recovery service error: {e}")
                await asyncio.sleep(10)

    async def stop_recovery_service(self):
        """Stop network recovery service"""
        self.running = False
        log_info("Stopping network recovery service")

    async def trigger_recovery(self, trigger: RecoveryTrigger, target_node: Optional[str] = None,
                             metadata: Dict = None):
        """Trigger recovery procedure"""
        log_info(f"Recovery triggered: {trigger.value}")

        if trigger == RecoveryTrigger.PARTITION_DETECTED:
            await self._handle_partition_recovery()
        elif trigger == RecoveryTrigger.HIGH_LATENCY:
            await self._handle_latency_recovery(target_node)
        elif trigger == RecoveryTrigger.PEER_FAILURE:
            await self._handle_peer_failure_recovery(target_node)
        elif trigger == RecoveryTrigger.MANUAL:
            await self._handle_manual_recovery(target_node, metadata)

    async def _handle_partition_recovery(self):
        """Handle partition recovery"""
        log_info("Starting partition recovery")

        # Get partition status
        partition_status = self.partition_manager.get_partition_status()

        if partition_status['state'] == PartitionState.PARTITIONED.value:
            # Create recovery actions for partition
            await self._create_partition_recovery_actions(partition_status)

    async def _create_partition_recovery_actions(self, partition_status: Dict):
        """Create recovery actions for partition"""
        local_partition_size = self.partition_manager.get_local_partition_size()

        # Emergency recovery if partition is too small
        if local_partition_size < len(self.discovery.peers) * self.emergency_threshold:
            await self._create_emergency_recovery_actions()
        else:
            await self._create_standard_recovery_actions()

    async def _create_emergency_recovery_actions(self):
        """Create emergency recovery actions"""
        log_warn("Creating emergency recovery actions")

        # Try all bootstrap nodes
        for address, port in self.discovery.bootstrap_nodes:
            action = RecoveryAction(
                action_type="bootstrap_connect",
                target_node=f"{address}:{port}",
                priority=1,  # Highest priority
                created_at=time.time(),
                attempts=0,
                max_attempts=5,
                success=False
            )
            self.recovery_actions.append(action)

        # Try alternative discovery methods
        action = RecoveryAction(
            action_type="alternative_discovery",
            target_node="broadcast",
            priority=2,
            created_at=time.time(),
            attempts=0,
            max_attempts=3,
            success=False
        )
        self.recovery_actions.append(action)

    async def _create_standard_recovery_actions(self):
        """Create standard recovery actions"""
        # Reconnect to recently lost peers
        health_status = self.health_monitor.get_all_health_status()

        for node_id, health in health_status.items():
            if health.status.value == "offline":
                peer = self.discovery.peers.get(node_id)
                if peer:
                    action = RecoveryAction(
                        action_type="reconnect_peer",
                        target_node=node_id,
                        priority=3,
                        created_at=time.time(),
                        attempts=0,
                        max_attempts=3,
                        success=False
                    )
                    self.recovery_actions.append(action)

    async def _handle_latency_recovery(self, target_node: str):
        """Handle high latency recovery"""
        log_info(f"Starting latency recovery for node {target_node}")

        # Find alternative paths
        action = RecoveryAction(
            action_type="find_alternative_path",
            target_node=target_node,
            priority=4,
            created_at=time.time(),
            attempts=0,
            max_attempts=2,
            success=False
        )
        self.recovery_actions.append(action)

    async def _handle_peer_failure_recovery(self, target_node: str):
        """Handle peer failure recovery"""
        log_info(f"Starting peer failure recovery for node {target_node}")

        # Replace failed peer
        action = RecoveryAction(
            action_type="replace_peer",
            target_node=target_node,
            priority=3,
            created_at=time.time(),
            attempts=0,
            max_attempts=3,
            success=False
        )
        self.recovery_actions.append(action)

    async def _handle_manual_recovery(self, target_node: Optional[str], metadata: Dict):
        """Handle manual recovery"""
        recovery_type = metadata.get('type', 'standard')

        if recovery_type == 'force_reconnect':
            await self._force_reconnect(target_node)
        elif recovery_type == 'reset_network':
            await self._reset_network()
        elif recovery_type == 'bootstrap_only':
            await self._bootstrap_only_recovery()

    async def _process_recovery_actions(self):
        """Process pending recovery actions"""
        # Sort actions by priority
        sorted_actions = sorted(
            [a for a in self.recovery_actions if not a.success],
            key=lambda x: x.priority
        )

        for action in sorted_actions[:5]:  # Process max 5 actions per cycle
            if action.attempts >= action.max_attempts:
                # Mark as failed and remove
                log_warn(f"Recovery action failed after {action.attempts} attempts: {action.action_type}")
                self.recovery_actions.remove(action)
                continue

            # Execute action
            success = await self._execute_recovery_action(action)

            if success:
                action.success = True
                log_info(f"Recovery action succeeded: {action.action_type}")
            else:
                action.attempts += 1
                log_debug(f"Recovery action attempt {action.attempts} failed: {action.action_type}")

    async def _execute_recovery_action(self, action: RecoveryAction) -> bool:
        """Execute individual recovery action"""
        try:
            if action.action_type == "bootstrap_connect":
                return await self._execute_bootstrap_connect(action)
            elif action.action_type == "alternative_discovery":
                return await self._execute_alternative_discovery(action)
            elif action.action_type == "reconnect_peer":
                return await self._execute_reconnect_peer(action)
            elif action.action_type == "find_alternative_path":
                return await self._execute_find_alternative_path(action)
            elif action.action_type == "replace_peer":
                return await self._execute_replace_peer(action)
            else:
                log_warn(f"Unknown recovery action type: {action.action_type}")
                return False

        except Exception as e:
            log_error(f"Error executing recovery action {action.action_type}: {e}")
            return False

    async def _execute_bootstrap_connect(self, action: RecoveryAction) -> bool:
        """Execute bootstrap connect action"""
        address, port = action.target_node.split(':')

        try:
            success = await self.discovery._connect_to_peer(address, int(port))
            if success:
                log_info(f"Bootstrap connect successful to {address}:{port}")
            return success
        except Exception as e:
            log_error(f"Bootstrap connect failed to {address}:{port}: {e}")
            return False

    async def _execute_alternative_discovery(self) -> bool:
        """Execute alternative discovery action"""
        try:
            # Try multicast discovery
            await self._multicast_discovery()

            # Try DNS discovery
            await self._dns_discovery()

            # Check if any new peers were discovered
            new_peers = len(self.discovery.get_peer_list())
            return new_peers > 0

        except Exception as e:
            log_error(f"Alternative discovery failed: {e}")
            return False

    async def _execute_reconnect_peer(self, action: RecoveryAction) -> bool:
        """Execute peer reconnection action"""
        peer = self.discovery.peers.get(action.target_node)
        if not peer:
            return False

        try:
            success = await self.discovery._connect_to_peer(peer.address, peer.port)
            if success:
                log_info(f"Reconnected to peer {action.target_node}")
            return success
        except Exception as e:
            log_error(f"Reconnection failed for peer {action.target_node}: {e}")
            return False

    async def _execute_find_alternative_path(self, action: RecoveryAction) -> bool:
        """Execute alternative path finding action"""
        # This would implement finding alternative network paths
        # For now, just try to reconnect through different peers
        log_info(f"Finding alternative path for node {action.target_node}")

        # Try connecting through other peers
        for peer in self.discovery.get_peer_list():
            if peer.node_id != action.target_node:
                # In a real implementation, this would route through the peer
                success = await self.discovery._connect_to_peer(peer.address, peer.port)
                if success:
                    return True

        return False

    async def _execute_replace_peer(self, action: RecoveryAction) -> bool:
        """Execute peer replacement action"""
        log_info(f"Attempting to replace peer {action.target_node}")

        # Find replacement peer
        replacement = await self._find_replacement_peer()

        if replacement:
            # Remove failed peer
            await self.discovery._remove_peer(action.target_node, "Peer replacement")

            # Add replacement peer
            success = await self.discovery._connect_to_peer(replacement[0], replacement[1])

            if success:
                log_info(f"Successfully replaced peer {action.target_node} with {replacement[0]}:{replacement[1]}")
                return True

        return False

    async def _find_replacement_peer(self) -> Optional[Tuple[str, int]]:
        """Find replacement peer from known sources"""
        # Try bootstrap nodes first
        for address, port in self.discovery.bootstrap_nodes:
            peer_id = f"{address}:{port}"
            if peer_id not in self.discovery.peers:
                return (address, port)

        return None

    async def _monitor_network_health(self):
        """Monitor network health for recovery triggers"""
        # Check for high latency
        health_status = self.health_monitor.get_all_health_status()

        for node_id, health in health_status.items():
            if health.latency_ms > 2000:  # 2 seconds
                await self.trigger_recovery(RecoveryTrigger.HIGH_LATENCY, node_id)

    async def _adaptive_strategy_adjustment(self):
        """Adjust recovery strategy based on network conditions"""
        if self.recovery_strategy != RecoveryStrategy.ADAPTIVE:
            return

        # Count recent failures
        recent_failures = len([
            action for action in self.recovery_actions
            if not action.success and time.time() - action.created_at < 300
        ])

        # Adjust strategy based on failure rate
        if recent_failures > 10:
            self.recovery_strategy = RecoveryStrategy.CONSERVATIVE
            log_info("Switching to conservative recovery strategy")
        elif recent_failures < 3:
            self.recovery_strategy = RecoveryStrategy.AGGRESSIVE
            log_info("Switching to aggressive recovery strategy")

    async def _force_reconnect(self, target_node: Optional[str]):
        """Force reconnection to specific node or all nodes"""
        if target_node:
            peer = self.discovery.peers.get(target_node)
            if peer:
                await self.discovery._connect_to_peer(peer.address, peer.port)
        else:
            # Reconnect to all peers
            for peer in self.discovery.get_peer_list():
                await self.discovery._connect_to_peer(peer.address, peer.port)

    async def _reset_network(self):
        """Reset network connections"""
        log_warn("Resetting network connections")

        # Clear all peers
        self.discovery.peers.clear()

        # Restart discovery
        await self.discovery._connect_to_bootstrap_nodes()

    async def _bootstrap_only_recovery(self):
        """Recover using bootstrap nodes only"""
        log_info("Starting bootstrap-only recovery")

        # Clear current peers
        self.discovery.peers.clear()

        # Connect only to bootstrap nodes
        for address, port in self.discovery.bootstrap_nodes:
            await self.discovery._connect_to_peer(address, port)

    async def _multicast_discovery(self):
        """Multicast discovery implementation"""
        # Implementation would use UDP multicast
        log_debug("Executing multicast discovery")

    async def _dns_discovery(self):
        """DNS discovery implementation"""
        # Implementation would query DNS records
        log_debug("Executing DNS discovery")

    def get_recovery_status(self) -> Dict:
        """Get current recovery status"""
        pending_actions = [a for a in self.recovery_actions if not a.success]
        successful_actions = [a for a in self.recovery_actions if a.success]

        return {
            'strategy': self.recovery_strategy.value,
            'pending_actions': len(pending_actions),
            'successful_actions': len(successful_actions),
            'total_actions': len(self.recovery_actions),
            'recent_failures': len([
                a for a in self.recovery_actions
                if not a.success and time.time() - a.created_at < 300
            ]),
            'actions': [
                {
                    'type': a.action_type,
                    'target': a.target_node,
                    'priority': a.priority,
                    'attempts': a.attempts,
                    'max_attempts': a.max_attempts,
                    'created_at': a.created_at
                }
                for a in pending_actions[:10]  # Return first 10
            ]
        }

# Global recovery manager
recovery_manager: Optional[NetworkRecoveryManager] = None

def get_recovery_manager() -> Optional[NetworkRecoveryManager]:
    """Get global recovery manager"""
    return recovery_manager

def create_recovery_manager(discovery: P2PDiscovery, health_monitor: PeerHealthMonitor,
                          partition_manager: NetworkPartitionManager) -> NetworkRecoveryManager:
    """Create and set global recovery manager"""
    global recovery_manager
    recovery_manager = NetworkRecoveryManager(discovery, health_monitor, partition_manager)
    return recovery_manager
EOF

    log_info "Network recovery mechanisms created"
}

# Function to create network tests
create_network_tests() {
    log_info "Creating network test suite..."

    mkdir -p "/opt/aitbc/apps/blockchain-node/tests/network"

    cat > "/opt/aitbc/apps/blockchain-node/tests/network/test_discovery.py" << 'EOF'
"""
Tests for P2P Discovery Service
"""

import pytest
import asyncio
from unittest.mock import Mock, patch

from aitbc_chain.network.discovery import P2PDiscovery, PeerNode, NodeStatus

class TestP2PDiscovery:
    """Test cases for P2P discovery service"""

    def setup_method(self):
        """Setup test environment"""
        self.discovery = P2PDiscovery("test-node", "127.0.0.1", 8000)

        # Add bootstrap nodes
        self.discovery.add_bootstrap_node("127.0.0.1", 8001)
        self.discovery.add_bootstrap_node("127.0.0.1", 8002)

    def test_generate_node_id(self):
        """Test node ID generation"""
        address = "127.0.0.1"
        port = 8000
        public_key = "test_public_key"

        node_id = self.discovery.generate_node_id(address, port, public_key)

        assert isinstance(node_id, str)
        assert len(node_id) == 64  # SHA256 hex length

        # Test consistency
        node_id2 = self.discovery.generate_node_id(address, port, public_key)
        assert node_id == node_id2

    def test_add_bootstrap_node(self):
        """Test adding bootstrap node"""
        initial_count = len(self.discovery.bootstrap_nodes)

        self.discovery.add_bootstrap_node("127.0.0.1", 8003)

        assert len(self.discovery.bootstrap_nodes) == initial_count + 1
        assert ("127.0.0.1", 8003) in self.discovery.bootstrap_nodes

    def test_generate_node_id_consistency(self):
        """Test node ID generation consistency"""
        address = "192.168.1.1"
        port = 9000
        public_key = "test_key"

        node_id1 = self.discovery.generate_node_id(address, port, public_key)
        node_id2 = self.discovery.generate_node_id(address, port, public_key)

        assert node_id1 == node_id2

        # Different inputs should produce different IDs
        node_id3 = self.discovery.generate_node_id("192.168.1.2", port, public_key)
        assert node_id1 != node_id3

    def test_get_peer_count_empty(self):
        """Test getting peer count with no peers"""
        assert self.discovery.get_peer_count() == 0

    def test_get_peer_list_empty(self):
        """Test getting peer list with no peers"""
        assert self.discovery.get_peer_list() == []

    def test_update_peer_reputation_new_peer(self):
        """Test updating reputation for non-existent peer"""
        result = self.discovery.update_peer_reputation("nonexistent", 0.1)
        assert result is False

    def test_update_peer_reputation_bounds(self):
        """Test reputation bounds"""
        # Add a test peer
        peer = PeerNode(
            node_id="test_peer",
            address="127.0.0.1",
            port=8001,
            public_key="test_key",
            last_seen=0,
            status=NodeStatus.ONLINE,
            capabilities=["test"],
            reputation=0.5,
            connection_count=0
        )
        self.discovery.peers["test_peer"] = peer

        # Try to increase beyond 1.0
        result = self.discovery.update_peer_reputation("test_peer", 0.6)
        assert result is True
        assert self.discovery.peers["test_peer"].reputation == 1.0

        # Try to decrease below 0.0
        result = self.discovery.update_peer_reputation("test_peer", -1.5)
        assert result is True
        assert self.discovery.peers["test_peer"].reputation == 0.0

if __name__ == "__main__":
    pytest.main([__file__])
EOF

    log_info "Network test suite created"
}

# Function to setup test network
setup_test_network() {
    log_info "Setting up network infrastructure test environment..."

    # Create test network configuration
    cat > "/opt/aitbc/config/network_test.json" << 'EOF'
{
    "network_name": "network-test",
    "discovery": {
        "bootstrap_nodes": [
            "10.1.223.93:8000",
            "10.1.223.40:8000",
            "10.1.223.93:8001"
        ],
        "discovery_interval": 30,
        "peer_timeout": 300,
        "max_peers": 50
    },
    "health_monitoring": {
        "check_interval": 60,
        "max_latency_ms": 1000,
        "min_availability_percent": 90.0,
        "min_health_score": 0.5,
        "max_consecutive_failures": 3
    },
    "peer_management": {
        "max_connections": 50,
        "min_connections": 8,
        "connection_retry_interval": 300,
        "ban_threshold": 0.1,
        "auto_reconnect": true,
        "auto_ban_malicious": true,
        "load_balance": true
    },
    "topology": {
        "strategy": "hybrid",
        "optimization_interval": 300,
        "max_degree": 8,
        "min_degree": 3
    },
    "partition_handling": {
        "detection_interval": 30,
        "recovery_timeout": 300,
        "max_partition_size": 0.4,
        "min_connected_nodes": 3,
        "partition_detection_threshold": 0.3
    },
    "recovery": {
        "strategy": "adaptive",
        "recovery_interval": 60,
        "max_recovery_attempts": 3,
        "recovery_timeout": 300,
        "emergency_threshold": 0.1
    }
}
EOF

    log_info "Network test configuration created"
}

# Function to run network tests
run_network_tests() {
    log_info "Running network infrastructure tests..."

    cd /opt/aitbc/apps/blockchain-node

    # Install test dependencies if needed
    if ! python -c "import networkx" 2>/dev/null; then
        log_info "Installing networkx..."
        pip install networkx
    fi

    # Run tests
    python -m pytest tests/network/ -v

    if [ $? -eq 0 ]; then
        log_info "All network tests passed!"
    else
        log_error "Some network tests failed!"
        return 1
    fi
}

# Main execution
main() {
    log_info "Starting Phase 2: Network Infrastructure Setup"

    # Create necessary directories
    mkdir -p "$NETWORK_DIR"
    mkdir -p "/opt/aitbc/config"

    # Execute setup steps
    backup_network
    create_p2p_discovery
    create_peer_health_monitoring
    create_dynamic_peer_management
    create_topology_optimization
    create_partition_handling
    create_recovery_mechanisms
    create_network_tests
    setup_test_network

    # Run tests
    if run_network_tests; then
        log_info "Phase 2 network infrastructure setup completed successfully!"
        log_info "Next steps:"
        log_info "1. Configure network parameters"
        log_info "2. Start network services"
        log_info "3. Test peer discovery and health monitoring"
        log_info "4. Proceed to Phase 3: Economic Layer"
    else
        log_error "Phase 2 setup failed - check test output"
        return 1
    fi
}

# Execute main function
main "$@"