feat: implement agent coordination foundation (Week 1)
✅ Multi-Agent Communication Framework - Implemented comprehensive communication protocols - Created hierarchical, P2P, and broadcast protocols - Added message types and routing system - Implemented agent discovery and registration - Created load balancer for task distribution - Built FastAPI application with full API ✅ Core Components Implemented - CommunicationManager: Protocol management - MessageRouter: Advanced message routing - AgentRegistry: Agent discovery and management - LoadBalancer: Intelligent task distribution - TaskDistributor: Priority-based task handling - WebSocketHandler: Real-time communication ✅ API Endpoints - /health: Health check endpoint - /agents/register: Agent registration - /agents/discover: Agent discovery - /tasks/submit: Task submission - /messages/send: Message sending - /load-balancer/stats: Load balancing statistics - /registry/stats: Registry statistics ✅ Production Ready - SystemD service configuration - Docker containerization - Comprehensive test suite - Configuration management - Error handling and logging - Performance monitoring 🚀 Week 1 complete: Agent coordination foundation implemented!
This commit is contained in:
518
apps/agent-coordinator/src/app/main.py
Normal file
518
apps/agent-coordinator/src/app/main.py
Normal file
@@ -0,0 +1,518 @@
|
||||
"""
|
||||
Main FastAPI Application for AITBC Agent Coordinator
|
||||
"""
|
||||
|
||||
import asyncio
|
||||
import logging
|
||||
from contextlib import asynccontextmanager
|
||||
from datetime import datetime
|
||||
from typing import Dict, List, Optional, Any
|
||||
import uuid
|
||||
|
||||
from fastapi import FastAPI, HTTPException, BackgroundTasks, Depends, status
|
||||
from fastapi.middleware.cors import CORSMiddleware
|
||||
from fastapi.responses import JSONResponse
|
||||
from pydantic import BaseModel, Field
|
||||
import uvicorn
|
||||
|
||||
from .protocols.communication import CommunicationManager, create_protocol, MessageType
|
||||
from .protocols.message_types import MessageProcessor, create_task_message, create_status_message
|
||||
from .routing.agent_discovery import AgentRegistry, AgentDiscoveryService, create_agent_info
|
||||
from .routing.load_balancer import LoadBalancer, TaskDistributor, TaskPriority, LoadBalancingStrategy
|
||||
|
||||
# Configure logging
|
||||
logging.basicConfig(
|
||||
level=logging.INFO,
|
||||
format="%(asctime)s - %(name)s - %(levelname)s - %(message)s"
|
||||
)
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# Global variables
|
||||
agent_registry: Optional[AgentRegistry] = None
|
||||
discovery_service: Optional[AgentDiscoveryService] = None
|
||||
load_balancer: Optional[LoadBalancer] = None
|
||||
task_distributor: Optional[TaskDistributor] = None
|
||||
communication_manager: Optional[CommunicationManager] = None
|
||||
message_processor: Optional[MessageProcessor] = None
|
||||
|
||||
@asynccontextmanager
|
||||
async def lifespan(app: FastAPI):
|
||||
"""Application lifespan management"""
|
||||
# Startup
|
||||
logger.info("Starting AITBC Agent Coordinator...")
|
||||
|
||||
# Initialize services
|
||||
global agent_registry, discovery_service, load_balancer, task_distributor, communication_manager, message_processor
|
||||
|
||||
# Start agent registry
|
||||
agent_registry = AgentRegistry()
|
||||
await agent_registry.start()
|
||||
|
||||
# Initialize discovery service
|
||||
discovery_service = AgentDiscoveryService(agent_registry)
|
||||
|
||||
# Initialize load balancer
|
||||
load_balancer = LoadBalancer(agent_registry)
|
||||
load_balancer.set_strategy(LoadBalancingStrategy.LEAST_CONNECTIONS)
|
||||
|
||||
# Initialize task distributor
|
||||
task_distributor = TaskDistributor(load_balancer)
|
||||
|
||||
# Initialize communication manager
|
||||
communication_manager = CommunicationManager("agent-coordinator")
|
||||
|
||||
# Initialize message processor
|
||||
message_processor = MessageProcessor("agent-coordinator")
|
||||
|
||||
# Start background tasks
|
||||
asyncio.create_task(task_distributor.start_distribution())
|
||||
asyncio.create_task(message_processor.start_processing())
|
||||
|
||||
logger.info("Agent Coordinator started successfully")
|
||||
|
||||
yield
|
||||
|
||||
# Shutdown
|
||||
logger.info("Shutting down AITBC Agent Coordinator...")
|
||||
|
||||
if agent_registry:
|
||||
await agent_registry.stop()
|
||||
|
||||
logger.info("Agent Coordinator shut down")
|
||||
|
||||
# Create FastAPI app
|
||||
app = FastAPI(
|
||||
title="AITBC Agent Coordinator",
|
||||
description="Advanced multi-agent coordination and management system",
|
||||
version="1.0.0",
|
||||
lifespan=lifespan
|
||||
)
|
||||
|
||||
# Add CORS middleware
|
||||
app.add_middleware(
|
||||
CORSMiddleware,
|
||||
allow_origins=["*"],
|
||||
allow_credentials=True,
|
||||
allow_methods=["*"],
|
||||
allow_headers=["*"],
|
||||
)
|
||||
|
||||
# Pydantic models
|
||||
class AgentRegistrationRequest(BaseModel):
|
||||
agent_id: str = Field(..., description="Unique agent identifier")
|
||||
agent_type: str = Field(..., description="Type of agent")
|
||||
capabilities: List[str] = Field(default_factory=list, description="Agent capabilities")
|
||||
services: List[str] = Field(default_factory=list, description="Available services")
|
||||
endpoints: Dict[str, str] = Field(default_factory=dict, description="Service endpoints")
|
||||
metadata: Dict[str, Any] = Field(default_factory=dict, description="Additional metadata")
|
||||
|
||||
class AgentStatusUpdate(BaseModel):
|
||||
status: str = Field(..., description="Agent status")
|
||||
load_metrics: Dict[str, float] = Field(default_factory=dict, description="Load metrics")
|
||||
|
||||
class TaskSubmission(BaseModel):
|
||||
task_data: Dict[str, Any] = Field(..., description="Task data")
|
||||
priority: str = Field("normal", description="Task priority")
|
||||
requirements: Optional[Dict[str, Any]] = Field(None, description="Task requirements")
|
||||
|
||||
class MessageRequest(BaseModel):
|
||||
receiver_id: str = Field(..., description="Receiver agent ID")
|
||||
message_type: str = Field(..., description="Message type")
|
||||
payload: Dict[str, Any] = Field(..., description="Message payload")
|
||||
priority: str = Field("normal", description="Message priority")
|
||||
|
||||
# Health check endpoint
|
||||
@app.get("/health")
|
||||
async def health_check():
|
||||
"""Health check endpoint"""
|
||||
return {
|
||||
"status": "healthy",
|
||||
"service": "agent-coordinator",
|
||||
"timestamp": datetime.utcnow().isoformat(),
|
||||
"version": "1.0.0"
|
||||
}
|
||||
|
||||
# Root endpoint
|
||||
@app.get("/")
|
||||
async def root():
|
||||
"""Root endpoint with service information"""
|
||||
return {
|
||||
"service": "AITBC Agent Coordinator",
|
||||
"description": "Advanced multi-agent coordination and management system",
|
||||
"version": "1.0.0",
|
||||
"endpoints": [
|
||||
"/health",
|
||||
"/agents/register",
|
||||
"/agents/discover",
|
||||
"/agents/{agent_id}",
|
||||
"/agents/{agent_id}/status",
|
||||
"/tasks/submit",
|
||||
"/tasks/status",
|
||||
"/messages/send",
|
||||
"/load-balancer/stats",
|
||||
"/registry/stats"
|
||||
]
|
||||
}
|
||||
|
||||
# Agent registration
|
||||
@app.post("/agents/register")
|
||||
async def register_agent(request: AgentRegistrationRequest):
|
||||
"""Register a new agent"""
|
||||
try:
|
||||
if not agent_registry:
|
||||
raise HTTPException(status_code=503, detail="Agent registry not available")
|
||||
|
||||
# Create agent info
|
||||
agent_info = create_agent_info(
|
||||
agent_id=request.agent_id,
|
||||
agent_type=request.agent_type,
|
||||
capabilities=request.capabilities,
|
||||
services=request.services,
|
||||
endpoints=request.endpoints
|
||||
)
|
||||
agent_info.metadata = request.metadata
|
||||
|
||||
# Register agent
|
||||
success = await agent_registry.register_agent(agent_info)
|
||||
|
||||
if success:
|
||||
return {
|
||||
"status": "success",
|
||||
"message": f"Agent {request.agent_id} registered successfully",
|
||||
"agent_id": request.agent_id,
|
||||
"registered_at": datetime.utcnow().isoformat()
|
||||
}
|
||||
else:
|
||||
raise HTTPException(status_code=500, detail="Failed to register agent")
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error registering agent: {e}")
|
||||
raise HTTPException(status_code=500, detail=str(e))
|
||||
|
||||
# Agent discovery
|
||||
@app.post("/agents/discover")
|
||||
async def discover_agents(query: Dict[str, Any]):
|
||||
"""Discover agents based on criteria"""
|
||||
try:
|
||||
if not agent_registry:
|
||||
raise HTTPException(status_code=503, detail="Agent registry not available")
|
||||
|
||||
agents = await agent_registry.discover_agents(query)
|
||||
|
||||
return {
|
||||
"status": "success",
|
||||
"query": query,
|
||||
"agents": [agent.to_dict() for agent in agents],
|
||||
"count": len(agents),
|
||||
"timestamp": datetime.utcnow().isoformat()
|
||||
}
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error discovering agents: {e}")
|
||||
raise HTTPException(status_code=500, detail=str(e))
|
||||
|
||||
# Get agent by ID
|
||||
@app.get("/agents/{agent_id}")
|
||||
async def get_agent(agent_id: str):
|
||||
"""Get agent information by ID"""
|
||||
try:
|
||||
if not agent_registry:
|
||||
raise HTTPException(status_code=503, detail="Agent registry not available")
|
||||
|
||||
agent = await agent_registry.get_agent_by_id(agent_id)
|
||||
|
||||
if not agent:
|
||||
raise HTTPException(status_code=404, detail="Agent not found")
|
||||
|
||||
return {
|
||||
"status": "success",
|
||||
"agent": agent.to_dict(),
|
||||
"timestamp": datetime.utcnow().isoformat()
|
||||
}
|
||||
|
||||
except HTTPException:
|
||||
raise
|
||||
except Exception as e:
|
||||
logger.error(f"Error getting agent: {e}")
|
||||
raise HTTPException(status_code=500, detail=str(e))
|
||||
|
||||
# Update agent status
|
||||
@app.put("/agents/{agent_id}/status")
|
||||
async def update_agent_status(agent_id: str, request: AgentStatusUpdate):
|
||||
"""Update agent status"""
|
||||
try:
|
||||
if not agent_registry:
|
||||
raise HTTPException(status_code=503, detail="Agent registry not available")
|
||||
|
||||
from .routing.agent_discovery import AgentStatus
|
||||
|
||||
success = await agent_registry.update_agent_status(
|
||||
agent_id,
|
||||
AgentStatus(request.status),
|
||||
request.load_metrics
|
||||
)
|
||||
|
||||
if success:
|
||||
return {
|
||||
"status": "success",
|
||||
"message": f"Agent {agent_id} status updated",
|
||||
"agent_id": agent_id,
|
||||
"new_status": request.status,
|
||||
"updated_at": datetime.utcnow().isoformat()
|
||||
}
|
||||
else:
|
||||
raise HTTPException(status_code=500, detail="Failed to update agent status")
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error updating agent status: {e}")
|
||||
raise HTTPException(status_code=500, detail=str(e))
|
||||
|
||||
# Submit task
|
||||
@app.post("/tasks/submit")
|
||||
async def submit_task(request: TaskSubmission, background_tasks: BackgroundTasks):
|
||||
"""Submit a task for distribution"""
|
||||
try:
|
||||
if not task_distributor:
|
||||
raise HTTPException(status_code=503, detail="Task distributor not available")
|
||||
|
||||
# Convert priority string to enum
|
||||
try:
|
||||
priority = TaskPriority(request.priority.lower())
|
||||
except ValueError:
|
||||
raise HTTPException(status_code=400, detail=f"Invalid priority: {request.priority}")
|
||||
|
||||
# Submit task
|
||||
await task_distributor.submit_task(
|
||||
request.task_data,
|
||||
priority,
|
||||
request.requirements
|
||||
)
|
||||
|
||||
return {
|
||||
"status": "success",
|
||||
"message": "Task submitted successfully",
|
||||
"task_id": request.task_data.get("task_id", str(uuid.uuid4())),
|
||||
"priority": request.priority,
|
||||
"submitted_at": datetime.utcnow().isoformat()
|
||||
}
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error submitting task: {e}")
|
||||
raise HTTPException(status_code=500, detail=str(e))
|
||||
|
||||
# Get task status
|
||||
@app.get("/tasks/status")
|
||||
async def get_task_status():
|
||||
"""Get task distribution statistics"""
|
||||
try:
|
||||
if not task_distributor:
|
||||
raise HTTPException(status_code=503, detail="Task distributor not available")
|
||||
|
||||
stats = task_distributor.get_distribution_stats()
|
||||
|
||||
return {
|
||||
"status": "success",
|
||||
"stats": stats,
|
||||
"timestamp": datetime.utcnow().isoformat()
|
||||
}
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error getting task status: {e}")
|
||||
raise HTTPException(status_code=500, detail=str(e))
|
||||
|
||||
# Send message
|
||||
@app.post("/messages/send")
|
||||
async def send_message(request: MessageRequest):
|
||||
"""Send message to agent"""
|
||||
try:
|
||||
if not communication_manager:
|
||||
raise HTTPException(status_code=503, detail="Communication manager not available")
|
||||
|
||||
from .protocols.communication import AgentMessage, Priority
|
||||
|
||||
# Convert message type
|
||||
try:
|
||||
message_type = MessageType(request.message_type)
|
||||
except ValueError:
|
||||
raise HTTPException(status_code=400, detail=f"Invalid message type: {request.message_type}")
|
||||
|
||||
# Convert priority
|
||||
try:
|
||||
priority = Priority(request.priority.lower())
|
||||
except ValueError:
|
||||
raise HTTPException(status_code=400, detail=f"Invalid priority: {request.priority}")
|
||||
|
||||
# Create message
|
||||
message = AgentMessage(
|
||||
sender_id="agent-coordinator",
|
||||
receiver_id=request.receiver_id,
|
||||
message_type=message_type,
|
||||
priority=priority,
|
||||
payload=request.payload
|
||||
)
|
||||
|
||||
# Send message
|
||||
success = await communication_manager.send_message("hierarchical", message)
|
||||
|
||||
if success:
|
||||
return {
|
||||
"status": "success",
|
||||
"message": "Message sent successfully",
|
||||
"message_id": message.id,
|
||||
"receiver_id": request.receiver_id,
|
||||
"sent_at": datetime.utcnow().isoformat()
|
||||
}
|
||||
else:
|
||||
raise HTTPException(status_code=500, detail="Failed to send message")
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error sending message: {e}")
|
||||
raise HTTPException(status_code=500, detail=str(e))
|
||||
|
||||
# Load balancer statistics
|
||||
@app.get("/load-balancer/stats")
|
||||
async def get_load_balancer_stats():
|
||||
"""Get load balancer statistics"""
|
||||
try:
|
||||
if not load_balancer:
|
||||
raise HTTPException(status_code=503, detail="Load balancer not available")
|
||||
|
||||
stats = load_balancer.get_load_balancing_stats()
|
||||
|
||||
return {
|
||||
"status": "success",
|
||||
"stats": stats,
|
||||
"timestamp": datetime.utcnow().isoformat()
|
||||
}
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error getting load balancer stats: {e}")
|
||||
raise HTTPException(status_code=500, detail=str(e))
|
||||
|
||||
# Registry statistics
|
||||
@app.get("/registry/stats")
|
||||
async def get_registry_stats():
|
||||
"""Get agent registry statistics"""
|
||||
try:
|
||||
if not agent_registry:
|
||||
raise HTTPException(status_code=503, detail="Agent registry not available")
|
||||
|
||||
stats = await agent_registry.get_registry_stats()
|
||||
|
||||
return {
|
||||
"status": "success",
|
||||
"stats": stats,
|
||||
"timestamp": datetime.utcnow().isoformat()
|
||||
}
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error getting registry stats: {e}")
|
||||
raise HTTPException(status_code=500, detail=str(e))
|
||||
|
||||
# Get agents by service
|
||||
@app.get("/agents/service/{service}")
|
||||
async def get_agents_by_service(service: str):
|
||||
"""Get agents that provide a specific service"""
|
||||
try:
|
||||
if not agent_registry:
|
||||
raise HTTPException(status_code=503, detail="Agent registry not available")
|
||||
|
||||
agents = await agent_registry.get_agents_by_service(service)
|
||||
|
||||
return {
|
||||
"status": "success",
|
||||
"service": service,
|
||||
"agents": [agent.to_dict() for agent in agents],
|
||||
"count": len(agents),
|
||||
"timestamp": datetime.utcnow().isoformat()
|
||||
}
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error getting agents by service: {e}")
|
||||
raise HTTPException(status_code=500, detail=str(e))
|
||||
|
||||
# Get agents by capability
|
||||
@app.get("/agents/capability/{capability}")
|
||||
async def get_agents_by_capability(capability: str):
|
||||
"""Get agents that have a specific capability"""
|
||||
try:
|
||||
if not agent_registry:
|
||||
raise HTTPException(status_code=503, detail="Agent registry not available")
|
||||
|
||||
agents = await agent_registry.get_agents_by_capability(capability)
|
||||
|
||||
return {
|
||||
"status": "success",
|
||||
"capability": capability,
|
||||
"agents": [agent.to_dict() for agent in agents],
|
||||
"count": len(agents),
|
||||
"timestamp": datetime.utcnow().isoformat()
|
||||
}
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error getting agents by capability: {e}")
|
||||
raise HTTPException(status_code=500, detail=str(e))
|
||||
|
||||
# Set load balancing strategy
|
||||
@app.put("/load-balancer/strategy")
|
||||
async def set_load_balancing_strategy(strategy: str):
|
||||
"""Set load balancing strategy"""
|
||||
try:
|
||||
if not load_balancer:
|
||||
raise HTTPException(status_code=503, detail="Load balancer not available")
|
||||
|
||||
try:
|
||||
load_balancing_strategy = LoadBalancingStrategy(strategy.lower())
|
||||
except ValueError:
|
||||
raise HTTPException(status_code=400, detail=f"Invalid strategy: {strategy}")
|
||||
|
||||
load_balancer.set_strategy(load_balancing_strategy)
|
||||
|
||||
return {
|
||||
"status": "success",
|
||||
"message": f"Load balancing strategy set to {strategy}",
|
||||
"strategy": strategy,
|
||||
"updated_at": datetime.utcnow().isoformat()
|
||||
}
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error setting load balancing strategy: {e}")
|
||||
raise HTTPException(status_code=500, detail=str(e))
|
||||
|
||||
# Error handlers
|
||||
@app.exception_handler(404)
|
||||
async def not_found_handler(request, exc):
|
||||
return JSONResponse(
|
||||
status_code=404,
|
||||
content={
|
||||
"status": "error",
|
||||
"message": "Resource not found",
|
||||
"timestamp": datetime.utcnow().isoformat()
|
||||
}
|
||||
)
|
||||
|
||||
@app.exception_handler(500)
|
||||
async def internal_error_handler(request, exc):
|
||||
logger.error(f"Internal server error: {exc}")
|
||||
return JSONResponse(
|
||||
status_code=500,
|
||||
content={
|
||||
"status": "error",
|
||||
"message": "Internal server error",
|
||||
"timestamp": datetime.utcnow().isoformat()
|
||||
}
|
||||
)
|
||||
|
||||
# Main function
|
||||
def main():
|
||||
"""Main function to run the application"""
|
||||
uvicorn.run(
|
||||
"main:app",
|
||||
host="0.0.0.0",
|
||||
port=9001,
|
||||
reload=True,
|
||||
log_level="info"
|
||||
)
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Reference in New Issue
Block a user