diff --git a/.gitea/workflows/build-miner-binary.yml b/.gitea/workflows/build-miner-binary.yml index c179d75d..3700078e 100644 --- a/.gitea/workflows/build-miner-binary.yml +++ b/.gitea/workflows/build-miner-binary.yml @@ -77,11 +77,54 @@ jobs: echo "version=$VERSION" >> $GITHUB_OUTPUT echo "version=$VERSION" - - name: Upload artifacts - uses: actions/upload-artifact@v3 - with: - name: miner-binary - path: | - ${{ env.WORKSPACE }}/repo/scripts/gpu/aitbc-miner-debian - ${{ env.WORKSPACE }}/repo/scripts/gpu/aitbc-miner-debian-package.tar.gz - ${{ env.WORKSPACE }}/repo/scripts/gpu/SHA256SUMS + - name: Create Gitea release + run: | + cd "${{ env.WORKSPACE }}/repo" + VERSION=${GITHUB_REF#refs/tags/v} + + # Create release using Gitea API + curl -X POST \ + -H "Authorization: token ${{ secrets.GITEA_TOKEN }}" \ + -H "Content-Type: application/json" \ + http://gitea.bubuit.net:3000/api/v1/repos/oib/aitbc/releases \ + -d "{ + \"tag_name\": \"v${VERSION}\", + \"target_commitish\": \"main\", + \"name\": \"AITBC Miner v${VERSION}\", + \"body\": \"AITBC Miner for Debian Stable (trixie)\\n\\n## Changes\\n- See commit history for details\", + \"draft\": false, + \"prerelease\": false + }" + + - name: Upload binary to Gitea release + run: | + cd "${{ env.WORKSPACE }}/repo" + VERSION=${GITHUB_REF#refs/tags/v} + + # Upload binary + curl -X POST \ + -H "Authorization: token ${{ secrets.GITEA_TOKEN }}" \ + -F "attachment=@scripts/gpu/aitbc-miner-debian" \ + http://gitea.bubuit.net:3000/api/v1/repos/oib/aitbc/releases/v${VERSION}/assets + + - name: Upload package to Gitea release + run: | + cd "${{ env.WORKSPACE }}/repo" + VERSION=${GITHUB_REF#refs/tags/v} + + # Upload package + curl -X POST \ + -H "Authorization: token ${{ secrets.GITEA_TOKEN }}" \ + -F "attachment=@scripts/gpu/aitbc-miner-debian-package.tar.gz" \ + http://gitea.bubuit.net:3000/api/v1/repos/oib/aitbc/releases/v${VERSION}/assets + + - name: Upload checksums to Gitea release + run: | + cd "${{ env.WORKSPACE }}/repo" + VERSION=${GITHUB_REF#refs/tags/v} + + # Upload checksums + curl -X POST \ + -H "Authorization: token ${{ secrets.GITEA_TOKEN }}" \ + -F "attachment=@scripts/gpu/SHA256SUMS" \ + http://gitea.bubuit.net:3000/api/v1/repos/oib/aitbc/releases/v${VERSION}/assets diff --git a/aitbc/aitbc_logging.py b/aitbc/aitbc_logging.py index ee6c9c9f..ad03a0a6 100644 --- a/aitbc/aitbc_logging.py +++ b/aitbc/aitbc_logging.py @@ -5,23 +5,56 @@ Centralized logging utilities for the AITBC project import logging import sys -from typing import Optional +import json +from typing import Optional, Dict, Any +from datetime import datetime +from contextlib import contextmanager + +class StructuredFormatter(logging.Formatter): + """Structured JSON formatter for log aggregation""" + + def format(self, record: logging.LogRecord) -> str: + """Format log record as structured JSON""" + log_entry = { + "timestamp": datetime.utcnow().isoformat() + "Z", + "level": record.levelname, + "logger": record.name, + "message": record.getMessage(), + "module": record.module, + "function": record.funcName, + "line": record.lineno, + } + + # Add extra fields if present + if hasattr(record, 'extra'): + log_entry.update(record.extra) + + # Add exception info if present + if record.exc_info: + log_entry["exception"] = self.formatException(record.exc_info) + + return json.dumps(log_entry) def setup_logger( name: str, level: str = "INFO", - format_string: Optional[str] = None + format_string: Optional[str] = None, + structured: bool = False ) -> logging.Logger: """Setup a logger with consistent formatting""" - if format_string is None: - format_string = "%(asctime)s - %(name)s - %(levelname)s - %(message)s" - logger = logging.getLogger(name) logger.setLevel(getattr(logging, level.upper())) if not logger.handlers: handler = logging.StreamHandler(sys.stdout) - formatter = logging.Formatter(format_string) + + if structured: + formatter = StructuredFormatter() + else: + if format_string is None: + format_string = "%(asctime)s - %(name)s - %(levelname)s - %(message)s" + formatter = logging.Formatter(format_string) + handler.setFormatter(formatter) logger.addHandler(handler) @@ -31,6 +64,48 @@ def get_logger(name: str) -> logging.Logger: """Get a logger instance""" return logging.getLogger(name) -def configure_logging(level: str = "INFO") -> None: +def configure_logging(level: str = "INFO", structured: bool = False) -> None: """Configure root logging level""" - logging.basicConfig(level=getattr(logging, level.upper())) + root_logger = logging.getLogger() + root_logger.setLevel(getattr(logging, level.upper())) + + if structured: + # Remove existing handlers + for handler in root_logger.handlers[:]: + root_logger.removeHandler(handler) + + # Add structured handler + handler = logging.StreamHandler(sys.stdout) + handler.setFormatter(StructuredFormatter()) + root_logger.addHandler(handler) + +@contextmanager +def log_context(**kwargs): + """Context manager for adding contextual information to logs""" + logger = logging.getLogger() + extra = {'extra': kwargs} + + class ContextFilter(logging.Filter): + def filter(self, record): + for key, value in kwargs.items(): + setattr(record, key, value) + return True + + context_filter = ContextFilter() + logger.addFilter(context_filter) + try: + yield + finally: + logger.removeFilter(context_filter) + +class LogContext: + """Class for adding contextual information to logs across multiple calls""" + + def __init__(self, **kwargs): + self.context = kwargs + + def __enter__(self): + return log_context(**self.context).__enter__() + + def __exit__(self, exc_type, exc_val, exc_tb): + pass diff --git a/aitbc/alerting.py b/aitbc/alerting.py new file mode 100644 index 00000000..ac5ad449 --- /dev/null +++ b/aitbc/alerting.py @@ -0,0 +1,414 @@ +""" +AITBC Alerting Module +Alerting and notification system for AITBC applications +""" + +import asyncio +from typing import Callable, Dict, Any, List, Optional +from datetime import datetime, timedelta +from dataclasses import dataclass, field +from enum import Enum +import json + +from .aitbc_logging import get_logger + +logger = get_logger(__name__) + + +class AlertSeverity(Enum): + """Alert severity levels""" + INFO = "info" + WARNING = "warning" + ERROR = "error" + CRITICAL = "critical" + + +class AlertStatus(Enum): + """Alert status""" + ACTIVE = "active" + ACKNOWLEDGED = "acknowledged" + RESOLVED = "resolved" + + +@dataclass +class Alert: + """Alert data structure""" + id: str + severity: AlertSeverity + title: str + message: str + source: str + timestamp: datetime = field(default_factory=datetime.utcnow) + status: AlertStatus = AlertStatus.ACTIVE + metadata: Dict[str, Any] = field(default_factory=dict) + acknowledged_by: Optional[str] = None + acknowledged_at: Optional[datetime] = None + resolved_at: Optional[datetime] = None + + def to_dict(self) -> Dict[str, Any]: + """Convert alert to dictionary""" + return { + "id": self.id, + "severity": self.severity.value, + "title": self.title, + "message": self.message, + "source": self.source, + "timestamp": self.timestamp.isoformat(), + "status": self.status.value, + "metadata": self.metadata, + "acknowledged_by": self.acknowledged_by, + "acknowledged_at": self.acknowledged_at.isoformat() if self.acknowledged_at else None, + "resolved_at": self.resolved_at.isoformat() if self.resolved_at else None, + } + + +class AlertChannel: + """Base class for alert channels""" + + async def send(self, alert: Alert) -> bool: + """ + Send alert through this channel + + Args: + alert: Alert to send + + Returns: + True if sent successfully, False otherwise + """ + raise NotImplementedError + + +class LogAlertChannel(AlertChannel): + """Log-based alert channel""" + + async def send(self, alert: Alert) -> bool: + """Send alert to logs""" + try: + log_level = { + AlertSeverity.INFO: logger.info, + AlertSeverity.WARNING: logger.warning, + AlertSeverity.ERROR: logger.error, + AlertSeverity.CRITICAL: logger.critical, + }.get(alert.severity, logger.info) + + log_level( + f"Alert [{alert.severity.value.upper()}]: {alert.title}", + extra={ + "alert_id": alert.id, + "severity": alert.severity.value, + "source": alert.source, + "metadata": alert.metadata, + } + ) + return True + except Exception as e: + logger.error(f"Failed to send log alert: {e}") + return False + + +class WebhookAlertChannel(AlertChannel): + """Webhook-based alert channel""" + + def __init__(self, url: str, headers: Optional[Dict[str, str]] = None): + """ + Initialize webhook channel + + Args: + url: Webhook URL + headers: HTTP headers + """ + self.url = url + self.headers = headers or {} + + async def send(self, alert: Alert) -> bool: + """Send alert via webhook""" + try: + import httpx + + async with httpx.AsyncClient() as client: + response = await client.post( + self.url, + json=alert.to_dict(), + headers=self.headers, + timeout=10.0 + ) + response.raise_for_status() + return True + except Exception as e: + logger.error(f"Failed to send webhook alert: {e}") + return False + + +class AlertRule: + """Alert rule definition""" + + def __init__( + self, + name: str, + condition: Callable[[], bool], + severity: AlertSeverity, + title_template: str, + message_template: str, + source: str, + check_interval: int = 60, + cooldown: int = 300, + metadata: Optional[Dict[str, Any]] = None + ): + """ + Initialize alert rule + + Args: + name: Rule name + condition: Function that returns True if alert should fire + severity: Alert severity + title_template: Template for alert title + message_template: Template for alert message + source: Alert source + check_interval: Check interval in seconds + cooldown: Cooldown period in seconds + metadata: Additional metadata + """ + self.name = name + self.condition = condition + self.severity = severity + self.title_template = title_template + self.message_template = message_template + self.source = source + self.check_interval = check_interval + self.cooldown = cooldown + self.metadata = metadata or {} + self.last_fired: Optional[datetime] = None + self.enabled = True + + def should_fire(self) -> bool: + """Check if alert should fire""" + if not self.enabled: + return False + + if self.last_fired: + time_since_last = (datetime.utcnow() - self.last_fired).total_seconds() + if time_since_last < self.cooldown: + return False + + return self.condition() + + def fire(self) -> Alert: + """Create alert from this rule""" + self.last_fired = datetime.utcnow() + return Alert( + id=f"{self.name}-{int(datetime.utcnow().timestamp())}", + severity=self.severity, + title=self.title_template, + message=self.message_template, + source=self.source, + metadata=self.metadata + ) + + +class AlertManager: + """Alert manager for handling alerts and rules""" + + def __init__(self): + """Initialize alert manager""" + self.rules: Dict[str, AlertRule] = {} + self.channels: List[AlertChannel] = [] + self.active_alerts: Dict[str, Alert] = {} + self.alert_history: List[Alert] = [] + self._running = False + self._task: Optional[asyncio.Task] = None + + def add_rule(self, rule: AlertRule) -> None: + """ + Add alert rule + + Args: + rule: Alert rule to add + """ + self.rules[rule.name] = rule + logger.info(f"Added alert rule: {rule.name}") + + def remove_rule(self, name: str) -> None: + """ + Remove alert rule + + Args: + name: Rule name + """ + if name in self.rules: + del self.rules[name] + logger.info(f"Removed alert rule: {name}") + + def add_channel(self, channel: AlertChannel) -> None: + """ + Add alert channel + + Args: + channel: Alert channel to add + """ + self.channels.append(channel) + logger.info(f"Added alert channel: {channel.__class__.__name__}") + + async def check_rules(self) -> None: + """Check all alert rules and fire if needed""" + for rule in self.rules.values(): + try: + if rule.should_fire(): + alert = rule.fire() + await self.send_alert(alert) + except Exception as e: + logger.error(f"Error checking rule {rule.name}: {e}") + + async def send_alert(self, alert: Alert) -> None: + """ + Send alert through all channels + + Args: + alert: Alert to send + """ + self.active_alerts[alert.id] = alert + self.alert_history.append(alert) + + # Keep history limited + if len(self.alert_history) > 1000: + self.alert_history = self.alert_history[-1000:] + + # Send through all channels + for channel in self.channels: + try: + await channel.send(alert) + except Exception as e: + logger.error(f"Failed to send alert through channel: {e}") + + async def acknowledge_alert(self, alert_id: str, acknowledged_by: str) -> bool: + """ + Acknowledge an alert + + Args: + alert_id: Alert ID + acknowledged_by: User acknowledging the alert + + Returns: + True if acknowledged successfully + """ + if alert_id in self.active_alerts: + alert = self.active_alerts[alert_id] + alert.status = AlertStatus.ACKNOWLEDGED + alert.acknowledged_by = acknowledged_by + alert.acknowledged_at = datetime.utcnow() + logger.info(f"Alert acknowledged: {alert_id} by {acknowledged_by}") + return True + return False + + async def resolve_alert(self, alert_id: str) -> bool: + """ + Resolve an alert + + Args: + alert_id: Alert ID + + Returns: + True if resolved successfully + """ + if alert_id in self.active_alerts: + alert = self.active_alerts[alert_id] + alert.status = AlertStatus.RESOLVED + alert.resolved_at = datetime.utcnow() + del self.active_alerts[alert_id] + logger.info(f"Alert resolved: {alert_id}") + return True + return False + + def get_active_alerts(self) -> List[Alert]: + """Get all active alerts""" + return list(self.active_alerts.values()) + + def get_alert_history(self, limit: int = 100) -> List[Alert]: + """ + Get alert history + + Args: + limit: Maximum number of alerts to return + + Returns: + List of alerts + """ + return self.alert_history[-limit:] + + async def start(self) -> None: + """Start alert manager background task""" + if self._running: + return + + self._running = True + self._task = asyncio.create_task(self._run_checks()) + logger.info("Alert manager started") + + async def stop(self) -> None: + """Stop alert manager background task""" + if not self._running: + return + + self._running = False + if self._task: + self._task.cancel() + try: + await self._task + except asyncio.CancelledError: + pass + logger.info("Alert manager stopped") + + async def _run_checks(self) -> None: + """Background task to check alert rules""" + while self._running: + try: + await self.check_rules() + + # Calculate sleep time based on minimum check interval + min_interval = min((rule.check_interval for rule in self.rules.values()), default=60) + await asyncio.sleep(min_interval) + except asyncio.CancelledError: + break + except Exception as e: + logger.error(f"Error in alert check loop: {e}") + await asyncio.sleep(60) + + +# Global alert manager instance +_alert_manager: Optional[AlertManager] = None + + +def get_alert_manager() -> AlertManager: + """ + Get global alert manager instance + + Returns: + Alert manager instance + """ + global _alert_manager + if _alert_manager is None: + _alert_manager = AlertManager() + # Add default log channel + _alert_manager.add_channel(LogAlertChannel()) + return _alert_manager + + +def setup_alerting( + webhook_url: Optional[str] = None, + webhook_headers: Optional[Dict[str, str]] = None +) -> AlertManager: + """ + Setup alerting system + + Args: + webhook_url: Optional webhook URL for alerts + webhook_headers: Optional webhook headers + + Returns: + Alert manager instance + """ + manager = get_alert_manager() + + if webhook_url: + manager.add_channel(WebhookAlertChannel(webhook_url, webhook_headers)) + + return manager diff --git a/aitbc/tracing.py b/aitbc/tracing.py new file mode 100644 index 00000000..521cf501 --- /dev/null +++ b/aitbc/tracing.py @@ -0,0 +1,294 @@ +""" +AITBC Distributed Tracing Module +OpenTelemetry-based distributed tracing for AITBC applications +""" + +from typing import Optional, Dict, Any, Callable +from functools import wraps +from contextlib import contextmanager +import os + +# OpenTelemetry imports (optional - gracefully handle if not installed) +try: + from opentelemetry import trace + from opentelemetry.sdk.trace import TracerProvider + from opentelemetry.sdk.trace.export import BatchSpanProcessor, ConsoleSpanExporter + from opentelemetry.sdk.resources import Resource, SERVICE_NAME + from opentelemetry.instrumentation.fastapi import FastAPIInstrumentor + from opentelemetry.instrumentation.httpx import HTTPXClientInstrumentor + from opentelemetry.instrumentation.sqlalchemy import SQLAlchemyInstrumentor + from opentelemetry.trace import Status, StatusCode + OPENTELEMETRY_AVAILABLE = True +except ImportError: + OPENTELEMETRY_AVAILABLE = False + +# Global tracer instance +_tracer: Optional[object] = None +_tracer_provider: Optional[object] = None + + +def setup_tracing( + service_name: str, + service_version: str = "1.0.0", + exporter: str = "console", + sample_rate: float = 1.0 +) -> None: + """ + Setup OpenTelemetry tracing for the service + + Args: + service_name: Name of the service + service_version: Version of the service + exporter: Exporter type ('console', 'otlp', 'none') + sample_rate: Sampling rate (0.0 to 1.0) + """ + global _tracer, _tracer_provider + + if not OPENTELEMETRY_AVAILABLE: + print("OpenTelemetry not available, tracing disabled") + return + + # Create resource with service information + resource = Resource.create({ + SERVICE_NAME: service_name, + "service.version": service_version, + "deployment.environment": os.getenv("APP_ENV", "development") + }) + + # Create tracer provider + _tracer_provider = TracerProvider(resource=resource) + + # Configure exporter based on type + if exporter == "console": + span_processor = BatchSpanProcessor(ConsoleSpanExporter()) + _tracer_provider.add_span_processor(span_processor) + elif exporter == "otlp": + try: + from opentelemetry.exporter.otlp.proto.grpc.trace_exporter import OTLPSpanExporter + otlp_endpoint = os.getenv("OTEL_EXPORTER_OTLP_ENDPOINT", "http://localhost:4317") + span_processor = BatchSpanProcessor(OTLPSpanExporter(endpoint=otlp_endpoint)) + _tracer_provider.add_span_processor(span_processor) + except ImportError: + print("OTLP exporter not available, falling back to console") + span_processor = BatchSpanProcessor(ConsoleSpanExporter()) + _tracer_provider.add_span_processor(span_processor) + + # Set global tracer provider + trace.set_tracer_provider(_tracer_provider) + + # Get tracer + _tracer = trace.get_tracer(__name__) + + print(f"Tracing enabled for {service_name} with {exporter} exporter") + + +def get_tracer() -> Optional[object]: + """ + Get the global tracer instance + + Returns: + Tracer instance or None if not configured + """ + return _tracer + + +def instrument_fastapi(app) -> None: + """ + Instrument FastAPI application with tracing + + Args: + app: FastAPI application instance + """ + if not OPENTELEMETRY_AVAILABLE: + print("OpenTelemetry not available, FastAPI instrumentation disabled") + return + + try: + FastAPIInstrumentor.instrument_app(app) + print("FastAPI instrumentation enabled") + except Exception as e: + print(f"Failed to instrument FastAPI: {e}") + + +def instrument_httpx() -> None: + """Instrument HTTPX client with tracing""" + if not OPENTELEMETRY_AVAILABLE: + print("OpenTelemetry not available, HTTPX instrumentation disabled") + return + + try: + HTTPXClientInstrumentor().instrument() + print("HTTPX instrumentation enabled") + except Exception as e: + print(f"Failed to instrument HTTPX: {e}") + + +def instrument_sqlalchemy(engine) -> None: + """ + Instrument SQLAlchemy engine with tracing + + Args: + engine: SQLAlchemy engine instance + """ + if not OPENTELEMETRY_AVAILABLE: + print("OpenTelemetry not available, SQLAlchemy instrumentation disabled") + return + + try: + SQLAlchemyInstrumentor().instrument(engine=engine) + print("SQLAlchemy instrumentation enabled") + except Exception as e: + print(f"Failed to instrument SQLAlchemy: {e}") + + +@contextmanager +def trace_span( + name: str, + attributes: Optional[Dict[str, Any]] = None +): + """ + Context manager for creating a trace span + + Args: + name: Span name + attributes: Span attributes + + Yields: + Span object if tracing is available + """ + if not OPENTELEMETRY_AVAILABLE or _tracer is None: + yield None + return + + with _tracer.start_as_current_span(name, attributes=attributes or {}) as span: + yield span + + +def trace_function(name: Optional[str] = None): + """ + Decorator for tracing function execution + + Args: + name: Span name (defaults to function name) + + Returns: + Decorated function + """ + def decorator(func: Callable) -> Callable: + if not OPENTELEMETRY_AVAILABLE or _tracer is None: + return func + + span_name = name or f"{func.__module__}.{func.__name__}" + + @wraps(func) + def wrapper(*args, **kwargs): + with _tracer.start_as_current_span(span_name) as span: + # Add function arguments as attributes (if small) + try: + if args and len(args) < 3: + span.set_attribute("args", str(args)) + if kwargs and len(kwargs) < 3: + span.set_attribute("kwargs", str(kwargs)) + except Exception: + pass + + try: + result = func(*args, **kwargs) + span.set_status(Status(StatusCode.OK)) + return result + except Exception as e: + span.set_status(Status(StatusCode.ERROR, str(e))) + span.record_exception(e) + raise + + return wrapper + return decorator + + +def trace_async_function(name: Optional[str] = None): + """ + Decorator for tracing async function execution + + Args: + name: Span name (defaults to function name) + + Returns: + Decorated async function + """ + def decorator(func: Callable) -> Callable: + if not OPENTELEMETRY_AVAILABLE or _tracer is None: + return func + + span_name = name or f"{func.__module__}.{func.__name__}" + + @wraps(func) + async def wrapper(*args, **kwargs): + with _tracer.start_as_current_span(span_name) as span: + # Add function arguments as attributes (if small) + try: + if args and len(args) < 3: + span.set_attribute("args", str(args)) + if kwargs and len(kwargs) < 3: + span.set_attribute("kwargs", str(kwargs)) + except Exception: + pass + + try: + result = await func(*args, **kwargs) + span.set_status(Status(StatusCode.OK)) + return result + except Exception as e: + span.set_status(Status(StatusCode.ERROR, str(e))) + span.record_exception(e) + raise + + return wrapper + return decorator + + +def set_span_attribute(key: str, value: Any) -> None: + """ + Set an attribute on the current span + + Args: + key: Attribute key + value: Attribute value + """ + if not OPENTELEMETRY_AVAILABLE: + return + + current_span = trace.get_current_span() + if current_span: + current_span.set_attribute(key, str(value)) + + +def set_span_error(exception: Exception) -> None: + """ + Record an exception on the current span + + Args: + exception: Exception to record + """ + if not OPENTELEMETRY_AVAILABLE: + return + + current_span = trace.get_current_span() + if current_span: + current_span.set_status(Status(StatusCode.ERROR, str(exception))) + current_span.record_exception(exception) + + +def add_span_event(name: str, attributes: Optional[Dict[str, Any]] = None) -> None: + """ + Add an event to the current span + + Args: + name: Event name + attributes: Event attributes + """ + if not OPENTELEMETRY_AVAILABLE: + return + + current_span = trace.get_current_span() + if current_span: + current_span.add_event(name, attributes or {}) diff --git a/apps/coordinator-api/src/app/routers/adaptive_learning_health.py b/apps/coordinator-api/src/app/routers/adaptive_learning_health.py index d55e8544..24b483ce 100755 --- a/apps/coordinator-api/src/app/routers/adaptive_learning_health.py +++ b/apps/coordinator-api/src/app/routers/adaptive_learning_health.py @@ -10,9 +10,11 @@ from datetime import datetime, timezone from typing import Any import psutil -from fastapi import APIRouter, Depends +from fastapi import APIRouter, Depends, Request from sqlalchemy.orm import Session +from aitbc.rate_limiting import rate_limit + from aitbc import get_logger from ..services.ai_analytics.adaptive_learning import AdaptiveLearningService from ..storage import get_session @@ -23,7 +25,8 @@ router = APIRouter() @router.get("/health", tags=["health"], summary="Adaptive Learning Service Health") -async def adaptive_learning_health(session: Annotated[Session, Depends(get_session)]) -> dict[str, Any]: +@rate_limit(rate=1000, per=60) +async def adaptive_learning_health(request: Request, session: Annotated[Session, Depends(get_session)]) -> dict[str, Any]: """ Health check for Adaptive Learning Service (Port 8011) """ @@ -103,7 +106,8 @@ async def adaptive_learning_health(session: Annotated[Session, Depends(get_sessi @router.get("/health/deep", tags=["health"], summary="Deep Adaptive Learning Service Health") -async def adaptive_learning_deep_health(session: Annotated[Session, Depends(get_session)]) -> dict[str, Any]: +@rate_limit(rate=1000, per=60) +async def adaptive_learning_deep_health(request: Request, session: Annotated[Session, Depends(get_session)]) -> dict[str, Any]: """ Deep health check with learning framework validation """ diff --git a/apps/coordinator-api/src/app/routers/admin.py b/apps/coordinator-api/src/app/routers/admin.py index e43819ac..9f314b27 100755 --- a/apps/coordinator-api/src/app/routers/admin.py +++ b/apps/coordinator-api/src/app/routers/admin.py @@ -4,12 +4,11 @@ from datetime import datetime, timezone from typing import Annotated from fastapi import APIRouter, Depends, Header, HTTPException, Request -from slowapi import Limiter -from slowapi.util import get_remote_address from sqlalchemy.orm import Session from sqlmodel import select from aitbc import get_logger +from aitbc.rate_limiting import rate_limit from ..config import settings from ..deps import require_admin_key @@ -19,12 +18,12 @@ from ..utils.cache import cached, get_cache_config logger = get_logger(__name__) -limiter = Limiter(key_func=get_remote_address) router = APIRouter(prefix="/admin", tags=["admin"]) @router.get("/debug-settings", summary="Debug settings") -async def debug_settings() -> dict: # type: ignore[arg-type] +@rate_limit(rate=100, per=60) +async def debug_settings(request: Request) -> dict: # type: ignore[arg-type] # SECURITY FIX: Mask API keys before returning to prevent clear-text exposure def mask_keys(keys: list[str]) -> list[str]: return [key[:8] + "..." if len(key) > 8 else "***" for key in keys] @@ -38,8 +37,9 @@ async def debug_settings() -> dict: # type: ignore[arg-type] @router.post("/debug/create-test-miner", summary="Create a test miner for debugging") +@rate_limit(rate=10, per=60) async def create_test_miner( - session: Annotated[Session, Depends(get_session)], admin_key: str = Depends(require_admin_key()) + request: Request, session: Annotated[Session, Depends(get_session)], admin_key: str = Depends(require_admin_key()) ) -> dict[str, str]: # type: ignore[arg-type] """Create a test miner for debugging marketplace sync""" try: @@ -101,7 +101,8 @@ async def create_test_miner( @router.get("/test-key", summary="Test API key validation") -async def test_key(api_key: str = Header(default=None, alias="X-Api-Key")) -> dict[str, str]: # type: ignore[arg-type] +@rate_limit(rate=100, per=60) +async def test_key(request: Request, api_key: str = Header(default=None, alias="X-Api-Key")) -> dict[str, str]: # type: ignore[arg-type] masked_key = api_key[:8] + "..." if api_key else "None" logger.debug(f"Received API key: {masked_key}") logger.debug(f"Allowed admin keys count: {len(settings.admin_api_keys)}") @@ -115,7 +116,7 @@ async def test_key(api_key: str = Header(default=None, alias="X-Api-Key")) -> di @router.get("/stats", summary="Get coordinator stats") -@limiter.limit(lambda: settings.rate_limit_admin_stats) +@rate_limit(rate=100, per=60) @cached(**get_cache_config("job_list")) # Cache admin stats for 1 minute async def get_stats( request: Request, session: Annotated[Session, Depends(get_session)], api_key: str = Header(default=None, alias="X-Api-Key") @@ -151,7 +152,8 @@ async def get_stats( @router.get("/jobs", summary="List jobs") -async def list_jobs(session: Annotated[Session, Depends(get_session)], admin_key: str = Depends(require_admin_key())) -> dict[str, list[dict]]: # type: ignore[arg-type] +@rate_limit(rate=100, per=60) +async def list_jobs(request: Request, session: Annotated[Session, Depends(get_session)], admin_key: str = Depends(require_admin_key())) -> dict[str, list[dict]]: # type: ignore[arg-type] from ..domain import Job jobs = session.execute(select(Job).order_by(Job.requested_at.desc()).limit(100)).all() @@ -170,7 +172,8 @@ async def list_jobs(session: Annotated[Session, Depends(get_session)], admin_key @router.get("/miners", summary="List miners") -async def list_miners(session: Annotated[Session, Depends(get_session)], admin_key: str = Depends(require_admin_key())) -> dict[str, list[dict]]: # type: ignore[arg-type] +@rate_limit(rate=100, per=60) +async def list_miners(request: Request, session: Annotated[Session, Depends(get_session)], admin_key: str = Depends(require_admin_key())) -> dict[str, list[dict]]: # type: ignore[arg-type] from sqlmodel import select from ..domain import Miner @@ -195,6 +198,7 @@ async def list_miners(session: Annotated[Session, Depends(get_session)], admin_k @router.get("/status", summary="Get system status", response_model=None) +@rate_limit(rate=100, per=60) async def get_system_status( request: Request, session: Annotated[Session, Depends(get_session)], admin_key: str = Depends(require_admin_key()) ) -> dict[str, any]: # type: ignore[arg-type] @@ -263,7 +267,8 @@ async def get_system_status( # Agent endpoints temporarily added to admin router @router.post("/agents/networks", response_model=dict, status_code=201) -async def create_agent_network(network_data: dict) -> dict: +@rate_limit(rate=20, per=60) +async def create_agent_network(request: Request, network_data: dict) -> dict: """Create a new agent network for collaborative processing""" try: @@ -300,7 +305,8 @@ async def create_agent_network(network_data: dict) -> dict: @router.get("/agents/executions/{execution_id}/receipt") -async def get_execution_receipt(execution_id: str) -> dict: +@rate_limit(rate=100, per=60) +async def get_execution_receipt(request: Request, execution_id: str) -> dict: """Get verifiable receipt for completed execution""" try: diff --git a/apps/coordinator-api/src/app/routers/agent_integration_router.py b/apps/coordinator-api/src/app/routers/agent_integration_router.py index a12ac859..a8394e78 100755 --- a/apps/coordinator-api/src/app/routers/agent_integration_router.py +++ b/apps/coordinator-api/src/app/routers/agent_integration_router.py @@ -5,9 +5,10 @@ Agent Integration and Deployment API Router for Verifiable AI Agent Orchestratio Provides REST API endpoints for production deployment and integration management """ -from fastapi import APIRouter, Depends, HTTPException +from fastapi import APIRouter, Depends, HTTPException, Request from aitbc import get_logger +from aitbc.rate_limiting import rate_limit logger = get_logger(__name__) @@ -31,7 +32,9 @@ router = APIRouter(prefix="/agents/integration", tags=["Agent Integration"]) @router.post("/deployments/config", response_model=AgentDeploymentConfig) +@rate_limit(rate=20, per=60) async def create_deployment_config( + request: Request, workflow_id: str, deployment_name: str, deployment_config: dict, @@ -65,7 +68,9 @@ async def create_deployment_config( @router.get("/deployments/configs", response_model=list[AgentDeploymentConfig]) +@rate_limit(rate=200, per=60) async def list_deployment_configs( + request: Request, workflow_id: str | None = None, status: DeploymentStatus | None = None, session: Session = Depends(Annotated[Session, Depends(get_session)]), @@ -99,7 +104,9 @@ async def list_deployment_configs( @router.get("/deployments/configs/{config_id}", response_model=AgentDeploymentConfig) +@rate_limit(rate=200, per=60) async def get_deployment_config( + request: Request, config_id: str, session: Session = Depends(Annotated[Session, Depends(get_session)]), current_user: str = Depends(require_admin_key()), @@ -126,7 +133,9 @@ async def get_deployment_config( @router.post("/deployments/{config_id}/deploy") +@rate_limit(rate=20, per=60) async def deploy_workflow( + request: Request, config_id: str, target_environment: str = "production", session: Session = Depends(Annotated[Session, Depends(get_session)]), @@ -160,7 +169,9 @@ async def deploy_workflow( @router.get("/deployments/{config_id}/health") +@rate_limit(rate=200, per=60) async def get_deployment_health( + request: Request, config_id: str, session: Session = Depends(Annotated[Session, Depends(get_session)]), current_user: str = Depends(require_admin_key()), @@ -190,7 +201,9 @@ async def get_deployment_health( @router.post("/deployments/{config_id}/scale") +@rate_limit(rate=20, per=60) async def scale_deployment( + request: Request, config_id: str, target_instances: int, session: Session = Depends(Annotated[Session, Depends(get_session)]), @@ -224,7 +237,9 @@ async def scale_deployment( @router.post("/deployments/{config_id}/rollback") +@rate_limit(rate=20, per=60) async def rollback_deployment( + request: Request, config_id: str, session: Session = Depends(Annotated[Session, Depends(get_session)]), current_user: str = Depends(require_admin_key()), @@ -255,7 +270,9 @@ async def rollback_deployment( @router.get("/deployments/instances", response_model=list[AgentDeploymentInstance]) +@rate_limit(rate=200, per=60) async def list_deployment_instances( + request: Request, deployment_id: str | None = None, environment: str | None = None, status: DeploymentStatus | None = None, @@ -295,7 +312,9 @@ async def list_deployment_instances( @router.get("/deployments/instances/{instance_id}", response_model=AgentDeploymentInstance) +@rate_limit(rate=200, per=60) async def get_deployment_instance( + request: Request, instance_id: str, session: Session = Depends(Annotated[Session, Depends(get_session)]), current_user: str = Depends(require_admin_key()), @@ -326,7 +345,9 @@ async def get_deployment_instance( @router.post("/integrations/zk/{execution_id}") +@rate_limit(rate=20, per=60) async def integrate_with_zk_system( + request: Request, execution_id: str, verification_level: VerificationLevel = VerificationLevel.BASIC, session: Session = Depends(Annotated[Session, Depends(get_session)]), @@ -360,7 +381,9 @@ async def integrate_with_zk_system( @router.get("/metrics/deployments/{deployment_id}") +@rate_limit(rate=200, per=60) async def get_deployment_metrics( + request: Request, deployment_id: str, time_range: str = "1h", session: Session = Depends(Annotated[Session, Depends(get_session)]), @@ -391,7 +414,9 @@ async def get_deployment_metrics( @router.post("/production/deploy") +@rate_limit(rate=20, per=60) async def deploy_to_production( + request: Request, workflow_id: str, deployment_config: dict, integration_config: dict | None = None, @@ -425,8 +450,9 @@ async def deploy_to_production( @router.get("/production/dashboard") +@rate_limit(rate=200, per=60) async def get_production_dashboard( - session: Session = Depends(Annotated[Session, Depends(get_session)]), current_user: str = Depends(require_admin_key()) + request: Request, session: Session = Depends(Annotated[Session, Depends(get_session)]), current_user: str = Depends(require_admin_key()) ) -> dict[str, Any]: """Get comprehensive production dashboard data""" @@ -479,8 +505,9 @@ async def get_production_dashboard( @router.get("/production/health") +@rate_limit(rate=1000, per=60) async def get_production_health( - session: Session = Depends(Annotated[Session, Depends(get_session)]), current_user: str = Depends(require_admin_key()) + request: Request, session: Session = Depends(Annotated[Session, Depends(get_session)]), current_user: str = Depends(require_admin_key()) ) -> dict[str, Any]: """Get overall production health status""" @@ -549,7 +576,9 @@ async def get_production_health( @router.get("/production/alerts") +@rate_limit(rate=200, per=60) async def get_production_alerts( + request: Request, severity: str | None = None, limit: int = 50, current_user: str = Depends(require_admin_key()), diff --git a/apps/coordinator-api/src/app/routers/client.py b/apps/coordinator-api/src/app/routers/client.py index fc783818..baea0e65 100755 --- a/apps/coordinator-api/src/app/routers/client.py +++ b/apps/coordinator-api/src/app/routers/client.py @@ -4,11 +4,10 @@ from datetime import datetime, timezone from typing import Annotated from fastapi import APIRouter, Depends, HTTPException, Request, status -from slowapi import Limiter -from slowapi.util import get_remote_address from sqlalchemy.orm import Session from aitbc import get_logger, AITBCHTTPClient, NetworkError +from aitbc.rate_limiting import rate_limit from ..config import settings from ..custom_types import JobState @@ -21,12 +20,11 @@ from ..utils.cache import cached, get_cache_config logger = get_logger(__name__) -limiter = Limiter(key_func=get_remote_address) router = APIRouter(tags=["client"]) @router.post("/jobs", response_model=JobView, status_code=status.HTTP_201_CREATED, summary="Submit a job") -@limiter.limit(lambda: settings.rate_limit_jobs_submit) +@rate_limit(rate=50, per=60) async def submit_job( req: JobCreate, request: Request, @@ -55,8 +53,9 @@ async def submit_job( @router.get("/jobs/{job_id}", response_model=JobView, summary="Get job status") +@rate_limit(rate=200, per=60) async def get_job( - job_id: str, + request: Request, job_id: str, session: Annotated[Session, Depends(get_session)], client_id: str = Depends(require_client_key()), ) -> JobView: # type: ignore[arg-type] @@ -69,8 +68,9 @@ async def get_job( @router.get("/jobs/{job_id}/result", response_model=JobResult, summary="Get job result") +@rate_limit(rate=200, per=60) async def get_job_result( - job_id: str, + request: Request, job_id: str, session: Annotated[Session, Depends(get_session)], client_id: str = Depends(require_client_key()), ) -> JobResult: # type: ignore[arg-type] @@ -88,8 +88,9 @@ async def get_job_result( @router.post("/jobs/{job_id}/cancel", response_model=JobView, summary="Cancel job") +@rate_limit(rate=50, per=60) async def cancel_job( - job_id: str, + request: Request, job_id: str, session: Annotated[Session, Depends(get_session)], client_id: str = Depends(require_client_key()), ) -> JobView: # type: ignore[arg-type] @@ -107,8 +108,9 @@ async def cancel_job( @router.get("/jobs/{job_id}/receipt", summary="Get latest signed receipt") +@rate_limit(rate=200, per=60) async def get_job_receipt( - job_id: str, + request: Request, job_id: str, session: Annotated[Session, Depends(get_session)], client_id: str = Depends(require_client_key()), ) -> dict: # type: ignore[arg-type] @@ -123,8 +125,9 @@ async def get_job_receipt( @router.get("/jobs/{job_id}/receipts", summary="List signed receipts") +@rate_limit(rate=200, per=60) async def list_job_receipts( - job_id: str, + request: Request, job_id: str, session: Annotated[Session, Depends(get_session)], client_id: str = Depends(require_client_key()), ) -> dict: # type: ignore[arg-type] @@ -134,6 +137,7 @@ async def list_job_receipts( @router.get("/jobs", summary="List jobs with filtering") +@rate_limit(rate=200, per=60) @cached(**get_cache_config("job_list")) # Cache job list for 30 seconds async def list_jobs( request: Request, @@ -164,6 +168,7 @@ async def list_jobs( @router.get("/jobs/history", summary="Get job history") +@rate_limit(rate=200, per=60) @cached(**get_cache_config("job_list")) # Cache job history for 30 seconds async def get_job_history( request: Request, @@ -216,6 +221,7 @@ async def get_job_history( @router.get("/blocks", summary="Get blockchain blocks") +@rate_limit(rate=200, per=60) async def get_blocks( request: Request, session: Annotated[Session, Depends(get_session)], @@ -252,7 +258,8 @@ async def get_blocks( # Temporary agent endpoints added to client router until agent router issue is resolved @router.post("/agents/networks", response_model=dict, status_code=201) -async def create_agent_network(network_data: dict) -> dict: +@rate_limit(rate=20, per=60) +async def create_agent_network(request: Request, network_data: dict) -> dict: """Create a new agent network for collaborative processing""" try: @@ -286,7 +293,8 @@ async def create_agent_network(network_data: dict) -> dict: @router.get("/agents/executions/{execution_id}/receipt") -async def get_execution_receipt(execution_id: str) -> dict: +@rate_limit(rate=200, per=60) +async def get_execution_receipt(request: Request, execution_id: str) -> dict: """Get verifiable receipt for completed execution""" try: diff --git a/apps/coordinator-api/src/app/routers/community.py b/apps/coordinator-api/src/app/routers/community.py index cfadcbdd..cdc07fd3 100755 --- a/apps/coordinator-api/src/app/routers/community.py +++ b/apps/coordinator-api/src/app/routers/community.py @@ -9,9 +9,11 @@ REST API for managing hermes developer profiles, SDKs, solutions, and hackathons from typing import Any -from fastapi import APIRouter, Body, Depends, HTTPException, Query +from fastapi import APIRouter, Body, Depends, HTTPException, Query, Request from pydantic import BaseModel, Field +from aitbc.rate_limiting import rate_limit + from aitbc import get_logger logger = get_logger(__name__) @@ -84,7 +86,8 @@ class HackathonCreateRequest(BaseModel): # Endpoints - Developer Ecosystem @router.post("/developers", response_model=DeveloperProfile) -async def create_developer_profile(request: DeveloperProfileCreate, session: Annotated[Session, Depends(get_session)]) -> DeveloperProfile: +@rate_limit(rate=10, per=60) +async def create_developer_profile(request: DeveloperProfileCreate, request_http: Request, session: Annotated[Session, Depends(get_session)]) -> DeveloperProfile: """Register a new developer in the hermes ecosystem""" service = DeveloperEcosystemService(session) try: @@ -98,7 +101,8 @@ async def create_developer_profile(request: DeveloperProfileCreate, session: Ann @router.get("/developers/{developer_id}", response_model=DeveloperProfile) -async def get_developer_profile(developer_id: str, session: Annotated[Session, Depends(get_session)]) -> DeveloperProfile: +@rate_limit(rate=100, per=60) +async def get_developer_profile(developer_id: str, request: Request, session: Annotated[Session, Depends(get_session)]) -> DeveloperProfile: """Get a developer's profile and reputation""" service = DeveloperEcosystemService(session) profile = await service.get_developer_profile(developer_id) @@ -108,7 +112,8 @@ async def get_developer_profile(developer_id: str, session: Annotated[Session, D @router.get("/sdk/latest") -async def get_latest_sdk(session: Annotated[Session, Depends(get_session)]) -> dict[str, Any]: +@rate_limit(rate=100, per=60) +async def get_latest_sdk(request: Request, session: Annotated[Session, Depends(get_session)]) -> dict[str, Any]: """Get information about the latest hermes SDK releases""" service = DeveloperEcosystemService(session) return await service.get_sdk_release_info() @@ -116,7 +121,8 @@ async def get_latest_sdk(session: Annotated[Session, Depends(get_session)]) -> d # Endpoints - Marketplace Solutions @router.post("/solutions/publish", response_model=AgentSolution) -async def publish_solution(request: SolutionPublishRequest, session: Annotated[Session, Depends(get_session)]) -> AgentSolution: +@rate_limit(rate=10, per=60) +async def publish_solution(request: SolutionPublishRequest, request_http: Request, session: Annotated[Session, Depends(get_session)]) -> AgentSolution: """Publish a new third-party agent solution to the marketplace""" service = ThirdPartySolutionService(session) try: @@ -128,9 +134,11 @@ async def publish_solution(request: SolutionPublishRequest, session: Annotated[S @router.get("/solutions", response_model=list[AgentSolution]) +@rate_limit(rate=100, per=60) async def list_solutions( category: str | None = None, limit: int = 50, + request: Request, session: Annotated[Session, Depends(get_session)] = Depends(get_session), ) -> list[AgentSolution]: """List available third-party agent solutions""" @@ -139,8 +147,9 @@ async def list_solutions( @router.post("/solutions/{solution_id}/purchase") +@rate_limit(rate=20, per=60) async def purchase_solution( - solution_id: str, session: Annotated[Session, Depends(get_session)], buyer_id: str = Body(embed=True) + solution_id: str, request: Request, session: Annotated[Session, Depends(get_session)], buyer_id: str = Body(embed=True) ) -> dict[str, Any]: """Purchase or install a third-party solution""" service = ThirdPartySolutionService(session) @@ -155,9 +164,11 @@ async def purchase_solution( # Endpoints - Innovation Labs @router.post("/labs/propose", response_model=InnovationLab) +@rate_limit(rate=10, per=60) async def propose_innovation_lab( researcher_id: str = Query(...), request: LabProposalRequest = Body(...), + request_http: Request, session: Annotated[Session, Depends(get_session)] = Depends(get_session), ) -> InnovationLab: """Propose a new agent innovation lab or research program""" @@ -170,8 +181,9 @@ async def propose_innovation_lab( @router.post("/labs/{lab_id}/join") +@rate_limit(rate=20, per=60) async def join_innovation_lab( - lab_id: str, session: Annotated[Session, Depends(get_session)], developer_id: str = Body(embed=True) + lab_id: str, request: Request, session: Annotated[Session, Depends(get_session)], developer_id: str = Body(embed=True) ) -> InnovationLab: """Join an active innovation lab""" service = InnovationLabService(session) @@ -183,8 +195,9 @@ async def join_innovation_lab( @router.post("/labs/{lab_id}/fund") +@rate_limit(rate=20, per=60) async def fund_innovation_lab( - lab_id: str, session: Annotated[Session, Depends(get_session)], amount: float = Body(embed=True) + lab_id: str, request: Request, session: Annotated[Session, Depends(get_session)], amount: float = Body(embed=True) ) -> InnovationLab: """Provide funding to a proposed innovation lab""" service = InnovationLabService(session) @@ -197,9 +210,11 @@ async def fund_innovation_lab( # Endpoints - Community Platform @router.post("/platform/posts", response_model=CommunityPost) +@rate_limit(rate=20, per=60) async def create_community_post( author_id: str = Query(...), request: PostCreateRequest = Body(...), + request_http: Request, session: Annotated[Session, Depends(get_session)] = Depends(get_session), ) -> CommunityPost: """Create a new post in the community forum""" @@ -212,9 +227,11 @@ async def create_community_post( @router.get("/platform/feed", response_model=list[CommunityPost]) +@rate_limit(rate=100, per=60) async def get_community_feed( category: str | None = None, limit: int = 20, + request: Request, session: Annotated[Session, Depends(get_session)] = Depends(get_session), ) -> list[CommunityPost]: """Get the latest community posts and discussions""" @@ -223,7 +240,8 @@ async def get_community_feed( @router.post("/platform/posts/{post_id}/upvote") -async def upvote_community_post(post_id: str, session: Annotated[Session, Depends(get_session)]) -> CommunityPost: +@rate_limit(rate=50, per=60) +async def upvote_community_post(post_id: str, request: Request, session: Annotated[Session, Depends(get_session)]) -> CommunityPost: """Upvote a community post (rewards author reputation)""" service = CommunityPlatformService(session) try: @@ -235,9 +253,11 @@ async def upvote_community_post(post_id: str, session: Annotated[Session, Depend # Endpoints - Hackathons @router.post("/hackathons/create", response_model=Hackathon) +@rate_limit(rate=10, per=60) async def create_hackathon( organizer_id: str = Query(...), request: HackathonCreateRequest = Body(...), + request_http: Request, session: Annotated[Session, Depends(get_session)] = Depends(get_session), ) -> Hackathon: """Create a new agent innovation hackathon (requires high reputation)""" @@ -252,8 +272,9 @@ async def create_hackathon( @router.post("/hackathons/{hackathon_id}/register") +@rate_limit(rate=20, per=60) async def register_for_hackathon( - hackathon_id: str, session: Annotated[Session, Depends(get_session)], developer_id: str = Body(embed=True) + hackathon_id: str, request: Request, session: Annotated[Session, Depends(get_session)], developer_id: str = Body(embed=True) ) -> Hackathon: """Register for an upcoming or ongoing hackathon""" service = CommunityPlatformService(session) diff --git a/apps/coordinator-api/src/app/routers/cross_chain_integration.py b/apps/coordinator-api/src/app/routers/cross_chain_integration.py index 75f61198..5febf109 100755 --- a/apps/coordinator-api/src/app/routers/cross_chain_integration.py +++ b/apps/coordinator-api/src/app/routers/cross_chain_integration.py @@ -7,9 +7,11 @@ from datetime import datetime, timezone from typing import Any from uuid import uuid4 -from fastapi import APIRouter, Depends, HTTPException, Query +from fastapi import APIRouter, Depends, HTTPException, Query, Request from sqlmodel import Session +from aitbc.rate_limiting import rate_limit + from ..agent_identity.manager import AgentIdentityManager from ..agent_identity.wallet_adapter_enhanced import ( SecurityLevel, @@ -45,7 +47,9 @@ def get_reputation_engine(session: Session = Depends(get_session)) -> CrossChain # Enhanced Wallet Adapter Endpoints @router.post("/wallets/create", response_model=dict[str, Any]) +@rate_limit(rate=20, per=60) async def create_enhanced_wallet( + request: Request, owner_address: str, chain_id: int, security_config: dict[str, Any], @@ -87,7 +91,9 @@ async def create_enhanced_wallet( @router.get("/wallets/{wallet_address}/balance", response_model=dict[str, Any]) +@rate_limit(rate=200, per=60) async def get_wallet_balance( + request: Request, wallet_address: str, chain_id: int, token_address: str | None = Query(None), session: Session = Depends(get_session) ) -> dict[str, Any]: """Get wallet balance with multi-token support""" @@ -110,7 +116,9 @@ async def get_wallet_balance( @router.post("/wallets/{wallet_address}/transactions", response_model=dict[str, Any]) +@rate_limit(rate=50, per=60) async def execute_wallet_transaction( + request: Request, wallet_address: str, chain_id: int, to_address: str, @@ -149,7 +157,9 @@ async def execute_wallet_transaction( @router.get("/wallets/{wallet_address}/transactions", response_model=list[dict[str, Any]]) +@rate_limit(rate=200, per=60) async def get_wallet_transaction_history( + request: Request, wallet_address: str, chain_id: int, limit: int = Query(100, ge=1, le=1000), @@ -178,7 +188,9 @@ async def get_wallet_transaction_history( @router.post("/wallets/{wallet_address}/sign", response_model=dict[str, Any]) +@rate_limit(rate=50, per=60) async def sign_message( + request: Request, wallet_address: str, chain_id: int, message: str, session: Session = Depends(get_session) ) -> dict[str, Any]: """Sign a message with wallet""" @@ -200,7 +212,9 @@ async def sign_message( @router.post("/wallets/verify-signature", response_model=dict[str, Any]) +@rate_limit(rate=200, per=60) async def verify_signature( + request: Request, message: str, signature: str, address: str, chain_id: int, session: Session = Depends(get_session) ) -> dict[str, Any]: """Verify a message signature""" @@ -226,7 +240,9 @@ async def verify_signature( # Cross-Chain Bridge Endpoints @router.post("/bridge/create-request", response_model=dict[str, Any]) +@rate_limit(rate=20, per=60) async def create_bridge_request( + request: Request, user_address: str, source_chain_id: int, target_chain_id: int, @@ -268,7 +284,8 @@ async def create_bridge_request( @router.get("/bridge/request/{bridge_request_id}", response_model=dict[str, Any]) -async def get_bridge_request_status(bridge_request_id: str, session: Session = Depends(get_session)) -> dict[str, Any]: +@rate_limit(rate=200, per=60) +async def get_bridge_request_status(request: Request, bridge_request_id: str, session: Session = Depends(get_session)) -> dict[str, Any]: """Get status of a bridge request""" try: @@ -285,7 +302,9 @@ async def get_bridge_request_status(bridge_request_id: str, session: Session = D @router.post("/bridge/request/{bridge_request_id}/cancel", response_model=dict[str, Any]) +@rate_limit(rate=20, per=60) async def cancel_bridge_request( + request: Request, bridge_request_id: str, reason: str, session: Session = Depends(get_session) ) -> dict[str, Any]: """Cancel a bridge request""" @@ -304,7 +323,9 @@ async def cancel_bridge_request( @router.get("/bridge/statistics", response_model=dict[str, Any]) +@rate_limit(rate=200, per=60) async def get_bridge_statistics( + request: Request, time_period_hours: int = Query(24, ge=1, le=8760), session: Session = Depends(get_session) ) -> dict[str, Any]: """Get bridge statistics""" @@ -323,7 +344,8 @@ async def get_bridge_statistics( @router.get("/bridge/liquidity-pools", response_model=list[dict[str, Any]]) -async def get_liquidity_pools(session: Session = Depends(get_session)) -> list[dict[str, Any]]: +@rate_limit(rate=200, per=60) +async def get_liquidity_pools(request: Request, session: Session = Depends(get_session)) -> list[dict[str, Any]]: """Get all liquidity pool information""" try: @@ -341,7 +363,9 @@ async def get_liquidity_pools(session: Session = Depends(get_session)) -> list[d # Multi-Chain Transaction Manager Endpoints @router.post("/transactions/submit", response_model=dict[str, Any]) +@rate_limit(rate=50, per=60) async def submit_transaction( + request: Request, user_id: str, chain_id: int, transaction_type: TransactionType, @@ -395,7 +419,8 @@ async def submit_transaction( @router.get("/transactions/{transaction_id}", response_model=dict[str, Any]) -async def get_transaction_status(transaction_id: str, session: Session = Depends(get_session)) -> dict[str, Any]: +@rate_limit(rate=200, per=60) +async def get_transaction_status(request: Request, transaction_id: str, session: Session = Depends(get_session)) -> dict[str, Any]: """Get detailed transaction status""" try: @@ -416,7 +441,8 @@ async def get_transaction_status(transaction_id: str, session: Session = Depends @router.post("/transactions/{transaction_id}/cancel", response_model=dict[str, Any]) -async def cancel_transaction(transaction_id: str, reason: str, session: Session = Depends(get_session)) -> dict[str, Any]: +@rate_limit(rate=20, per=60) +async def cancel_transaction(request: Request, transaction_id: str, reason: str, session: Session = Depends(get_session)) -> dict[str, Any]: """Cancel a transaction""" try: @@ -437,7 +463,9 @@ async def cancel_transaction(transaction_id: str, reason: str, session: Session @router.get("/transactions/history", response_model=list[dict[str, Any]]) +@rate_limit(rate=200, per=60) async def get_transaction_history( + request: Request, user_id: str | None = Query(None), chain_id: int | None = Query(None), transaction_type: TransactionType | None = Query(None), @@ -479,7 +507,9 @@ async def get_transaction_history( @router.get("/transactions/statistics", response_model=dict[str, Any]) +@rate_limit(rate=200, per=60) async def get_transaction_statistics( + request: Request, time_period_hours: int = Query(24, ge=1, le=8760), chain_id: int | None = Query(None), session: Session = Depends(get_session), @@ -504,7 +534,9 @@ async def get_transaction_statistics( @router.post("/transactions/optimize-routing", response_model=dict[str, Any]) +@rate_limit(rate=50, per=60) async def optimize_transaction_routing( + request: Request, transaction_type: TransactionType, amount: float, from_chain: int, @@ -535,7 +567,8 @@ async def optimize_transaction_routing( # Configuration and Status Endpoints @router.get("/chains/supported", response_model=list[dict[str, Any]]) -async def get_supported_chains() -> list[dict[str, Any]]: +@rate_limit(rate=500, per=60) +async def get_supported_chains(request: Request) -> list[dict[str, Any]]: """Get list of supported blockchain chains""" try: @@ -554,7 +587,8 @@ async def get_supported_chains() -> list[dict[str, Any]]: @router.get("/chains/{chain_id}/info", response_model=dict[str, Any]) -async def get_chain_info(chain_id: int, session: Session = Depends(get_session)) -> dict[str, Any]: +@rate_limit(rate=500, per=60) +async def get_chain_info(request: Request, chain_id: int, session: Session = Depends(get_session)) -> dict[str, Any]: """Get information about a specific chain""" try: @@ -576,7 +610,8 @@ async def get_chain_info(chain_id: int, session: Session = Depends(get_session)) @router.get("/health", response_model=dict[str, Any]) -async def get_cross_chain_health(session: Session = Depends(get_session)) -> dict[str, Any]: +@rate_limit(rate=1000, per=60) +async def get_cross_chain_health(request: Request, session: Session = Depends(get_session)) -> dict[str, Any]: """Get cross-chain integration health status""" try: @@ -614,7 +649,8 @@ async def get_cross_chain_health(session: Session = Depends(get_session)) -> dic @router.get("/config", response_model=dict[str, Any]) -async def get_cross_chain_config(session: Session = Depends(get_session)) -> dict[str, Any]: +@rate_limit(rate=500, per=60) +async def get_cross_chain_config(request: Request, session: Session = Depends(get_session)) -> dict[str, Any]: """Get cross-chain integration configuration""" try: diff --git a/apps/coordinator-api/src/app/routers/developer_platform.py b/apps/coordinator-api/src/app/routers/developer_platform.py index ec5549eb..48e5fa0d 100755 --- a/apps/coordinator-api/src/app/routers/developer_platform.py +++ b/apps/coordinator-api/src/app/routers/developer_platform.py @@ -6,9 +6,11 @@ REST API endpoints for the developer ecosystem including bounties, certification from datetime import datetime, timezone from typing import Any -from fastapi import APIRouter, Depends, HTTPException, Query +from fastapi import APIRouter, Depends, HTTPException, Query, Request from sqlmodel import Session, func, select +from aitbc.rate_limiting import rate_limit + from ..domain.developer_platform import ( BountyStatus, CertificationLevel, @@ -35,8 +37,9 @@ def get_governance_service(session: Session = Depends(get_session)) -> Governanc # Developer Management Endpoints @router.post("/register", response_model=dict[str, Any]) +@rate_limit(rate=10, per=60) async def register_developer( - request: DeveloperCreate, + request: DeveloperCreate, request_http: Request, session: Session = Depends(get_session), dev_service: DeveloperPlatformService = Depends(get_developer_platform_service), ) -> dict[str, Any]: @@ -61,8 +64,9 @@ async def register_developer( @router.get("/profile/{wallet_address}", response_model=dict[str, Any]) +@rate_limit(rate=200, per=60) async def get_developer_profile( - wallet_address: str, + wallet_address: str, request: Request, session: Session = Depends(get_session), dev_service: DeveloperPlatformService = Depends(get_developer_platform_service), ) -> dict[str, Any]: @@ -93,9 +97,11 @@ async def get_developer_profile( @router.put("/profile/{wallet_address}", response_model=dict[str, Any]) +@rate_limit(rate=50, per=60) async def update_developer_profile( wallet_address: str, updates: dict[str, Any], + request: Request, session: Session = Depends(get_session), dev_service: DeveloperPlatformService = Depends(get_developer_platform_service), ) -> dict[str, Any]: @@ -119,9 +125,11 @@ async def update_developer_profile( @router.get("/leaderboard", response_model=list[dict[str, Any]]) +@rate_limit(rate=200, per=60) async def get_leaderboard( limit: int = Query(100, ge=1, le=500, description="Maximum number of developers"), offset: int = Query(0, ge=0, description="Offset for pagination"), + request: Request, session: Session = Depends(get_session), dev_service: DeveloperPlatformService = Depends(get_developer_platform_service), ) -> list[dict[str, Any]]: @@ -149,8 +157,9 @@ async def get_leaderboard( @router.get("/stats/{wallet_address}", response_model=dict[str, Any]) +@rate_limit(rate=200, per=60) async def get_developer_stats( - wallet_address: str, + wallet_address: str, request: Request, session: Session = Depends(get_session), dev_service: DeveloperPlatformService = Depends(get_developer_platform_service), ) -> dict[str, Any]: @@ -168,8 +177,9 @@ async def get_developer_stats( # Bounty Management Endpoints @router.post("/bounties", response_model=dict[str, Any]) +@rate_limit(rate=20, per=60) async def create_bounty( - request: BountyCreate, + request: BountyCreate, request_http: Request, session: Session = Depends(get_session), dev_service: DeveloperPlatformService = Depends(get_developer_platform_service), ) -> dict[str, Any]: @@ -195,10 +205,12 @@ async def create_bounty( @router.get("/bounties", response_model=list[dict[str, Any]]) +@rate_limit(rate=200, per=60) async def list_bounties( status: BountyStatus | None = Query(None, description="Filter by bounty status"), limit: int = Query(100, ge=1, le=500, description="Maximum number of bounties"), offset: int = Query(0, ge=0, description="Offset for pagination"), + request: Request, session: Session = Depends(get_session), dev_service: DeveloperPlatformService = Depends(get_developer_platform_service), ) -> list[dict[str, Any]]: @@ -228,8 +240,9 @@ async def list_bounties( @router.get("/bounties/{bounty_id}", response_model=dict[str, Any]) +@rate_limit(rate=200, per=60) async def get_bounty_details( - bounty_id: str, + bounty_id: str, request: Request, session: Session = Depends(get_session), dev_service: DeveloperPlatformService = Depends(get_developer_platform_service), ) -> dict[str, Any]: @@ -246,9 +259,11 @@ async def get_bounty_details( @router.post("/bounties/{bounty_id}/submit", response_model=dict[str, Any]) +@rate_limit(rate=20, per=60) async def submit_bounty_solution( bounty_id: str, request: BountySubmissionCreate, + request_http: Request, session: Session = Depends(get_session), dev_service: DeveloperPlatformService = Depends(get_developer_platform_service), ) -> dict[str, Any]: @@ -275,10 +290,12 @@ async def submit_bounty_solution( @router.get("/bounties/my-submissions", response_model=list[dict[str, Any]]) +@rate_limit(rate=200, per=60) async def get_my_submissions( developer_id: str, limit: int = Query(100, ge=1, le=500, description="Maximum number of submissions"), offset: int = Query(0, ge=0, description="Offset for pagination"), + request: Request, session: Session = Depends(get_session), dev_service: DeveloperPlatformService = Depends(get_developer_platform_service), ) -> list[dict[str, Any]]: @@ -308,11 +325,13 @@ async def get_my_submissions( @router.post("/bounties/{bounty_id}/review", response_model=dict[str, Any]) +@rate_limit(rate=20, per=60) async def review_bounty_submission( submission_id: str, reviewer_address: str, review_notes: str, approved: bool = Query(True, description="Whether to approve the submission"), + request: Request, session: Session = Depends(get_session), dev_service: DeveloperPlatformService = Depends(get_developer_platform_service), ) -> dict[str, Any]: @@ -344,8 +363,9 @@ async def review_bounty_submission( @router.get("/bounties/stats", response_model=dict[str, Any]) +@rate_limit(rate=200, per=60) async def get_bounty_statistics( - session: Session = Depends(get_session), dev_service: DeveloperPlatformService = Depends(get_developer_platform_service) + request: Request, session: Session = Depends(get_session), dev_service: DeveloperPlatformService = Depends(get_developer_platform_service) ) -> dict[str, Any]: """Get comprehensive bounty statistics""" @@ -359,8 +379,9 @@ async def get_bounty_statistics( # Certification Management Endpoints @router.post("/certifications", response_model=dict[str, Any]) +@rate_limit(rate=20, per=60) async def grant_certification( - request: CertificationGrant, + request: CertificationGrant, request_http: Request, session: Session = Depends(get_session), dev_service: DeveloperPlatformService = Depends(get_developer_platform_service), ) -> dict[str, Any]: @@ -388,8 +409,9 @@ async def grant_certification( @router.get("/certifications/{wallet_address}", response_model=list[dict[str, Any]]) +@rate_limit(rate=200, per=60) async def get_developer_certifications( - wallet_address: str, + wallet_address: str, request: Request, session: Session = Depends(get_session), dev_service: DeveloperPlatformService = Depends(get_developer_platform_service), ) -> list[dict[str, Any]]: diff --git a/apps/coordinator-api/src/app/routers/ecosystem_dashboard.py b/apps/coordinator-api/src/app/routers/ecosystem_dashboard.py index bc3cb4cd..76d675cf 100755 --- a/apps/coordinator-api/src/app/routers/ecosystem_dashboard.py +++ b/apps/coordinator-api/src/app/routers/ecosystem_dashboard.py @@ -8,11 +8,12 @@ REST API for developer ecosystem metrics and analytics from datetime import datetime, timezone, timedelta from typing import Any, Dict, List, Optional -from fastapi import APIRouter, Depends, HTTPException +from fastapi import APIRouter, Depends, HTTPException, Request from pydantic import BaseModel, Field from sqlalchemy.orm import Session from aitbc import get_logger +from aitbc.rate_limiting import rate_limit from ..auth import get_current_user from ..domain.bounty import AgentMetrics, BountyStats, EcosystemMetrics from ..services.ecosystem_service import EcosystemService @@ -88,7 +89,9 @@ def get_ecosystem_service(session: Session = Depends(get_session)) -> EcosystemS # API endpoints @router.get("/ecosystem/developer-earnings", response_model=DeveloperEarningsResponse) +@rate_limit(rate=200, per=60) async def get_developer_earnings( + request: Request, period: str = Field(default="monthly", regex="^(daily|weekly|monthly)$"), session: Session = Depends(get_session), ecosystem_service: EcosystemService = Depends(get_ecosystem_service), @@ -108,7 +111,9 @@ async def get_developer_earnings( raise HTTPException(status_code=400, detail=str(e)) @router.get("/ecosystem/agent-utilization", response_model=AgentUtilizationResponse) +@rate_limit(rate=200, per=60) async def get_agent_utilization( + request: Request, period: str = Field(default="monthly", regex="^(daily|weekly|monthly)$"), session: Session = Depends(get_session), ecosystem_service: EcosystemService = Depends(get_ecosystem_service) @@ -127,7 +132,9 @@ async def get_agent_utilization( raise HTTPException(status_code=400, detail=str(e)) @router.get("/ecosystem/treasury-allocation", response_model=TreasuryAllocationResponse) +@rate_limit(rate=200, per=60) async def get_treasury_allocation( + request: Request, period: str = Field(default="monthly", regex="^(daily|weekly|monthly)$"), session: Session = Depends(get_session), ecosystem_service: EcosystemService = Depends(get_ecosystem_service) @@ -146,7 +153,9 @@ async def get_treasury_allocation( raise HTTPException(status_code=400, detail=str(e)) @router.get("/ecosystem/staking-metrics", response_model=StakingMetricsResponse) +@rate_limit(rate=200, per=60) async def get_staking_metrics( + request: Request, period: str = Field(default="monthly", regex="^(daily|weekly|monthly)$"), session: Session = Depends(get_session), ecosystem_service: EcosystemService = Depends(get_ecosystem_service) @@ -165,7 +174,9 @@ async def get_staking_metrics( raise HTTPException(status_code=400, detail=str(e)) @router.get("/ecosystem/bounty-analytics", response_model=BountyAnalyticsResponse) +@rate_limit(rate=200, per=60) async def get_bounty_analytics( + request: Request, period: str = Field(default="monthly", regex="^(daily|weekly|monthly)$"), session: Session = Depends(get_session), ecosystem_service: EcosystemService = Depends(get_ecosystem_service) @@ -184,7 +195,9 @@ async def get_bounty_analytics( raise HTTPException(status_code=400, detail=str(e)) @router.get("/ecosystem/overview", response_model=EcosystemOverviewResponse) +@rate_limit(rate=100, per=60) async def get_ecosystem_overview( + request: Request, period_type: str = Field(default="daily", regex="^(hourly|daily|weekly|monthly)$"), session: Session = Depends(get_session), ecosystem_service: EcosystemService = Depends(get_ecosystem_service) @@ -210,7 +223,9 @@ async def get_ecosystem_overview( raise HTTPException(status_code=400, detail=str(e)) @router.get("/ecosystem/metrics") +@rate_limit(rate=200, per=60) async def get_ecosystem_metrics( + request: Request, period_type: str = Field(default="daily", regex="^(hourly|daily|weekly|monthly)$"), start_date: Optional[datetime] = None, end_date: Optional[datetime] = None, @@ -238,9 +253,9 @@ async def get_ecosystem_metrics( raise HTTPException(status_code=400, detail=str(e)) @router.get("/ecosystem/health-score") +@rate_limit(rate=200, per=60) async def get_ecosystem_health_score( - session: Session = Depends(get_session), - ecosystem_service: EcosystemService = Depends(get_ecosystem_service) + request: Request, session: Session = Depends(get_session), ecosystem_service: EcosystemService = Depends(get_ecosystem_service) ) -> Dict[str, Any]: """Get overall ecosystem health score""" try: @@ -258,7 +273,9 @@ async def get_ecosystem_health_score( raise HTTPException(status_code=400, detail=str(e)) @router.get("/ecosystem/growth-indicators") +@rate_limit(rate=200, per=60) async def get_growth_indicators( + request: Request, period: str = Field(default="monthly", regex="^(daily|weekly|monthly)$"), session: Session = Depends(get_session), ecosystem_service: EcosystemService = Depends(get_ecosystem_service) @@ -279,7 +296,9 @@ async def get_growth_indicators( raise HTTPException(status_code=400, detail=str(e)) @router.get("/ecosystem/top-performers") +@rate_limit(rate=200, per=60) async def get_top_performers( + request: Request, category: str = Field(default="all", regex="^(developers|agents|stakers|all)$"), period: str = Field(default="monthly", regex="^(daily|weekly|monthly)$"), limit: int = Field(default=50, ge=1, le=100), diff --git a/apps/coordinator-api/src/app/routers/explorer.py b/apps/coordinator-api/src/app/routers/explorer.py index 320d999b..69de2c8c 100755 --- a/apps/coordinator-api/src/app/routers/explorer.py +++ b/apps/coordinator-api/src/app/routers/explorer.py @@ -2,9 +2,11 @@ from __future__ import annotations from typing import Annotated -from fastapi import APIRouter, Depends, Query +from fastapi import APIRouter, Depends, Query, Request from sqlalchemy.orm import Session +from aitbc.rate_limiting import rate_limit + from ..schemas import ( AddressListResponse, BlockListResponse, @@ -22,7 +24,9 @@ def _service(session: Annotated[Session, Depends(get_session)]) -> ExplorerServi @router.get("/blocks", response_model=BlockListResponse, summary="List recent blocks") +@rate_limit(rate=100, per=60) async def list_blocks( + request: Request, *, session: Annotated[Session, Depends(get_session)], limit: int = Query(default=20, ge=1, le=200), @@ -36,7 +40,9 @@ async def list_blocks( response_model=TransactionListResponse, summary="List recent transactions", ) +@rate_limit(rate=100, per=60) async def list_transactions( + request: Request, *, session: Annotated[Session, Depends(get_session)], limit: int = Query(default=50, ge=1, le=200), @@ -46,7 +52,9 @@ async def list_transactions( @router.get("/addresses", response_model=AddressListResponse, summary="List address summaries") +@rate_limit(rate=100, per=60) async def list_addresses( + request: Request, *, session: Annotated[Session, Depends(get_session)], limit: int = Query(default=50, ge=1, le=200), @@ -56,7 +64,9 @@ async def list_addresses( @router.get("/receipts", response_model=ReceiptListResponse, summary="List job receipts") +@rate_limit(rate=100, per=60) async def list_receipts( + request: Request, *, session: Annotated[Session, Depends(get_session)], job_id: str | None = Query(default=None, description="Filter by job identifier"), @@ -67,7 +77,9 @@ async def list_receipts( @router.get("/transactions/{tx_hash}", summary="Get transaction details by hash") +@rate_limit(rate=100, per=60) async def get_transaction( + request: Request, *, session: Annotated[Session, Depends(get_session)], tx_hash: str, diff --git a/apps/coordinator-api/src/app/routers/gpu_multimodal_health.py b/apps/coordinator-api/src/app/routers/gpu_multimodal_health.py index 1bbb4ccf..5747887a 100755 --- a/apps/coordinator-api/src/app/routers/gpu_multimodal_health.py +++ b/apps/coordinator-api/src/app/routers/gpu_multimodal_health.py @@ -11,16 +11,19 @@ from datetime import datetime, timezone from typing import Any import psutil -from fastapi import APIRouter, Depends +from fastapi import APIRouter, Depends, Request from sqlalchemy.orm import Session +from aitbc.rate_limiting import rate_limit + from ..storage import get_session router = APIRouter() @router.get("/health", tags=["health"], summary="GPU Multi-Modal Service Health") -async def gpu_multimodal_health(session: Annotated[Session, Depends(get_session)]) -> dict[str, Any]: +@rate_limit(rate=1000, per=60) +async def gpu_multimodal_health(request: Request, session: Annotated[Session, Depends(get_session)]) -> dict[str, Any]: """ Health check for GPU Multi-Modal Service (Port 8010) """ @@ -92,7 +95,8 @@ async def gpu_multimodal_health(session: Annotated[Session, Depends(get_session) @router.get("/health/deep", tags=["health"], summary="Deep GPU Multi-Modal Service Health") -async def gpu_multimodal_deep_health(session: Annotated[Session, Depends(get_session)]) -> dict[str, Any]: +@rate_limit(rate=1000, per=60) +async def gpu_multimodal_deep_health(request: Request, session: Annotated[Session, Depends(get_session)]) -> dict[str, Any]: """ Deep health check with CUDA performance validation """ diff --git a/apps/coordinator-api/src/app/routers/hermes_enhanced_health.py b/apps/coordinator-api/src/app/routers/hermes_enhanced_health.py index 9cb12204..0e59756c 100755 --- a/apps/coordinator-api/src/app/routers/hermes_enhanced_health.py +++ b/apps/coordinator-api/src/app/routers/hermes_enhanced_health.py @@ -10,9 +10,11 @@ from datetime import datetime, timezone from typing import Any import psutil -from fastapi import APIRouter, Depends +from fastapi import APIRouter, Depends, Request from sqlalchemy.orm import Session +from aitbc.rate_limiting import rate_limit + from aitbc import get_logger from ..services.hermes_enhanced import hermesEnhancedService @@ -23,7 +25,8 @@ logger = get_logger(__name__) @router.get("/health", tags=["health"], summary="hermes Enhanced Service Health") -async def hermes_enhanced_health(session: Annotated[Session, Depends(get_session)]) -> dict[str, Any]: +@rate_limit(rate=1000, per=60) +async def hermes_enhanced_health(request: Request, session: Annotated[Session, Depends(get_session)]) -> dict[str, Any]: """ Health check for hermes Enhanced Service (Port 8007) """ @@ -101,7 +104,8 @@ async def hermes_enhanced_health(session: Annotated[Session, Depends(get_session @router.get("/health/deep", tags=["health"], summary="Deep hermes Enhanced Service Health") -async def hermes_enhanced_deep_health(session: Annotated[Session, Depends(get_session)]) -> dict[str, Any]: +@rate_limit(rate=1000, per=60) +async def hermes_enhanced_deep_health(request: Request, session: Annotated[Session, Depends(get_session)]) -> dict[str, Any]: """ Deep health check with hermes ecosystem validation """ diff --git a/apps/coordinator-api/src/app/routers/marketplace_enhanced_health.py b/apps/coordinator-api/src/app/routers/marketplace_enhanced_health.py index 9107adcb..2211eacb 100755 --- a/apps/coordinator-api/src/app/routers/marketplace_enhanced_health.py +++ b/apps/coordinator-api/src/app/routers/marketplace_enhanced_health.py @@ -10,9 +10,11 @@ from datetime import datetime, timezone from typing import Any import psutil -from fastapi import APIRouter, Depends +from fastapi import APIRouter, Depends, Request from sqlalchemy.orm import Session +from aitbc.rate_limiting import rate_limit + from aitbc import get_logger from ..services.marketplace_enhanced import EnhancedMarketplaceService from ..storage import get_session @@ -24,7 +26,8 @@ router = APIRouter() @router.get("/health", tags=["health"], summary="Enhanced Marketplace Service Health") -async def marketplace_enhanced_health(session: Annotated[Session, Depends(get_session)]) -> dict[str, Any]: +@rate_limit(rate=1000, per=60) +async def marketplace_enhanced_health(request: Request, session: Annotated[Session, Depends(get_session)]) -> dict[str, Any]: """ Health check for Enhanced Marketplace Service (Port 8002) """ @@ -104,7 +107,8 @@ async def marketplace_enhanced_health(session: Annotated[Session, Depends(get_se @router.get("/health/deep", tags=["health"], summary="Deep Enhanced Marketplace Service Health") -async def marketplace_enhanced_deep_health(session: Annotated[Session, Depends(get_session)]) -> dict[str, Any]: +@rate_limit(rate=1000, per=60) +async def marketplace_enhanced_deep_health(request: Request, session: Annotated[Session, Depends(get_session)]) -> dict[str, Any]: """ Deep health check with marketplace feature validation """ diff --git a/apps/coordinator-api/src/app/routers/marketplace_performance.py b/apps/coordinator-api/src/app/routers/marketplace_performance.py index 786cec35..1b3e9efd 100755 --- a/apps/coordinator-api/src/app/routers/marketplace_performance.py +++ b/apps/coordinator-api/src/app/routers/marketplace_performance.py @@ -8,10 +8,11 @@ import os import sys from typing import Any -from fastapi import APIRouter, BackgroundTasks, HTTPException +from fastapi import APIRouter, BackgroundTasks, HTTPException, Request from pydantic import BaseModel, Field from aitbc import get_logger +from aitbc.rate_limiting import rate_limit logger = get_logger(__name__) @@ -88,7 +89,8 @@ class ScalingPolicyUpdate(BaseModel): # Endpoints: GPU Optimization @router.post("/gpu/allocate") -async def allocate_gpu_resources(request: GPUAllocationRequest) -> dict[str, Any]: +@rate_limit(rate=50, per=60) +async def allocate_gpu_resources(request: Request, gpu_request: GPUAllocationRequest) -> dict[str, Any]: """Request optimal GPU resource allocation for a marketplace task""" try: start_time = time.time() @@ -108,7 +110,8 @@ async def allocate_gpu_resources(request: GPUAllocationRequest) -> dict[str, Any @router.post("/gpu/release") -async def release_gpu_resources(request: GPUReleaseRequest) -> dict[str, str]: +@rate_limit(rate=50, per=60) +async def release_gpu_resources(request: Request, gpu_request: GPUReleaseRequest) -> dict[str, str]: """Release previously allocated GPU resources""" success = gpu_optimizer.release_resources(request.job_id) if not success: @@ -117,14 +120,16 @@ async def release_gpu_resources(request: GPUReleaseRequest) -> dict[str, str]: @router.get("/gpu/status") -async def get_gpu_status() -> dict[str, Any]: +@rate_limit(rate=200, per=60) +async def get_gpu_status(request: Request) -> dict[str, Any]: """Get overall GPU fleet status and optimization metrics""" return gpu_optimizer.get_system_status() # Endpoints: Distributed Processing @router.post("/distributed/task") -async def submit_distributed_task(request: DistributedTaskRequest) -> dict[str, str]: +@rate_limit(rate=50, per=60) +async def submit_distributed_task(request: Request, task_request: DistributedTaskRequest) -> dict[str, str]: """Submit a task to the distributed processing framework""" task = DistributedTask( task_id=None, @@ -140,7 +145,8 @@ async def submit_distributed_task(request: DistributedTaskRequest) -> dict[str, @router.get("/distributed/task/{task_id}") -async def get_distributed_task_status(task_id: str) -> dict[str, Any]: +@rate_limit(rate=200, per=60) +async def get_distributed_task_status(request: Request, task_id: str) -> dict[str, Any]: """Check the status and get results of a distributed task""" status = await distributed_coordinator.get_task_status(task_id) if not status: @@ -149,7 +155,8 @@ async def get_distributed_task_status(task_id: str) -> dict[str, Any]: @router.post("/distributed/worker/register") -async def register_worker(request: WorkerRegistrationRequest) -> dict[str, str]: +@rate_limit(rate=20, per=60) +async def register_worker(request: Request, worker_request: WorkerRegistrationRequest) -> dict[str, str]: """Register a new worker node in the cluster""" distributed_coordinator.register_worker( worker_id=request.worker_id, @@ -161,14 +168,16 @@ async def register_worker(request: WorkerRegistrationRequest) -> dict[str, str]: @router.get("/distributed/status") -async def get_cluster_status() -> dict[str, Any]: +@rate_limit(rate=200, per=60) +async def get_cluster_status(request: Request) -> dict[str, Any]: """Get overall distributed cluster health and load""" return distributed_coordinator.get_cluster_status() # Endpoints: Caching @router.get("/cache/stats") -async def get_cache_stats() -> dict[str, Any]: +@rate_limit(rate=200, per=60) +async def get_cache_stats(request: Request) -> dict[str, Any]: """Get current caching performance statistics""" return { "status": "connected" if cache_optimizer.is_connected else "local_only", @@ -178,7 +187,8 @@ async def get_cache_stats() -> dict[str, Any]: @router.post("/cache/invalidate/{namespace}") -async def invalidate_cache_namespace(namespace: str, background_tasks: BackgroundTasks) -> dict[str, str]: +@rate_limit(rate=20, per=60) +async def invalidate_cache_namespace(request: Request, namespace: str, background_tasks: BackgroundTasks) -> dict[str, str]: """Invalidate a specific cache namespace (e.g., 'order_book')""" background_tasks.add_task(cache_optimizer.invalidate_namespace, namespace) return {"success": True, "message": f"Invalidation for {namespace} queued"} @@ -186,20 +196,23 @@ async def invalidate_cache_namespace(namespace: str, background_tasks: Backgroun # Endpoints: Monitoring @router.get("/monitor/dashboard") -async def get_monitoring_dashboard() -> dict[str, Any]: +@rate_limit(rate=200, per=60) +async def get_monitoring_dashboard(request: Request) -> dict[str, Any]: """Get real-time performance dashboard data""" return marketplace_monitor.get_realtime_dashboard_data() # Endpoints: Auto-scaling @router.get("/scaler/status") -async def get_scaler_status() -> dict[str, Any]: +@rate_limit(rate=200, per=60) +async def get_scaler_status(request: Request) -> dict[str, Any]: """Get current auto-scaler status and active rules""" return resource_scaler.get_status() @router.post("/scaler/policy") -async def update_scaling_policy(policy_update: ScalingPolicyUpdate) -> dict[str, str]: +@rate_limit(rate=20, per=60) +async def update_scaling_policy(request: Request, policy_update: ScalingPolicyUpdate) -> dict[str, str]: """Update auto-scaling thresholds and parameters dynamically""" current_policy = resource_scaler.policy diff --git a/apps/coordinator-api/src/app/routers/monitor.py b/apps/coordinator-api/src/app/routers/monitor.py index 6d209ebf..5efe01e2 100644 --- a/apps/coordinator-api/src/app/routers/monitor.py +++ b/apps/coordinator-api/src/app/routers/monitor.py @@ -1,13 +1,16 @@ """Monitor router for AITBC Coordinator API.""" -from fastapi import APIRouter +from fastapi import APIRouter, Request from typing import List, Dict +from aitbc.rate_limiting import rate_limit + router = APIRouter(tags=["Monitor"]) @router.get("/api/v1/dashboard", response_model=dict) -async def get_dashboard(): +@rate_limit(rate=100, per=60) +async def get_dashboard(request: Request): """Get monitoring dashboard data.""" return { "overall_status": "operational", @@ -26,7 +29,8 @@ async def get_dashboard(): @router.get("/status", response_model=dict) -async def get_status(): +@rate_limit(rate=100, per=60) +async def get_status(request: Request): """Get coordinator status.""" return { "status": "online", @@ -37,18 +41,21 @@ async def get_status(): @router.get("/miners", response_model=List[Dict]) -async def get_miners(): +@rate_limit(rate=50, per=60) +async def get_miners(request: Request): """Get miners list.""" return [] @router.get("/dashboard", response_model=List[Dict]) -async def get_history_dashboard(): +@rate_limit(rate=50, per=60) +async def get_history_dashboard(request: Request): """Get historical dashboard data.""" return [] @router.get("/jobs", response_model=List[Dict]) -async def get_jobs(): +@rate_limit(rate=50, per=60) +async def get_jobs(request: Request): """Get jobs list for history and metrics commands.""" return [] diff --git a/apps/coordinator-api/src/app/routers/monitoring_dashboard.py b/apps/coordinator-api/src/app/routers/monitoring_dashboard.py index 880edd75..893425fe 100755 --- a/apps/coordinator-api/src/app/routers/monitoring_dashboard.py +++ b/apps/coordinator-api/src/app/routers/monitoring_dashboard.py @@ -7,9 +7,11 @@ import asyncio from datetime import datetime, timezone from typing import Any -from fastapi import APIRouter +from fastapi import APIRouter, Depends, HTTPException, Query, Request +from sqlmodel import Session from aitbc import get_logger, AITBCHTTPClient, NetworkError +from aitbc.rate_limiting import rate_limit logger = get_logger(__name__) @@ -63,7 +65,8 @@ SERVICES = { @router.get("/dashboard", tags=["monitoring"], summary="Enhanced Services Dashboard") -async def monitoring_dashboard() -> dict[str, Any]: +@rate_limit(rate=200, per=60) +async def monitoring_dashboard(request: Request) -> dict[str, Any]: """ Unified monitoring dashboard for all enhanced services """ diff --git a/apps/coordinator-api/src/app/routers/multimodal_health.py b/apps/coordinator-api/src/app/routers/multimodal_health.py index a4ffad69..504572c8 100755 --- a/apps/coordinator-api/src/app/routers/multimodal_health.py +++ b/apps/coordinator-api/src/app/routers/multimodal_health.py @@ -10,9 +10,11 @@ from datetime import datetime, timezone from typing import Any import psutil -from fastapi import APIRouter, Depends +from fastapi import APIRouter, Depends, Request from sqlalchemy.orm import Session +from aitbc.rate_limiting import rate_limit + from ..services.multimodal_agent import MultiModalAgentService from ..storage import get_session @@ -20,7 +22,8 @@ router = APIRouter() @router.get("/health", tags=["health"], summary="Multi-Modal Agent Service Health") -async def multimodal_health(session: Annotated[Session, Depends(get_session)]) -> dict[str, Any]: +@rate_limit(rate=1000, per=60) +async def multimodal_health(request: Request, session: Annotated[Session, Depends(get_session)]) -> dict[str, Any]: """ Health check for Multi-Modal Agent Service (Port 8002) """ @@ -87,7 +90,8 @@ async def multimodal_health(session: Annotated[Session, Depends(get_session)]) - @router.get("/health/deep", tags=["health"], summary="Deep Multi-Modal Service Health") -async def multimodal_deep_health(session: Annotated[Session, Depends(get_session)]) -> dict[str, Any]: +@rate_limit(rate=1000, per=60) +async def multimodal_deep_health(request: Request, session: Annotated[Session, Depends(get_session)]) -> dict[str, Any]: """ Deep health check with detailed multi-modal processing tests """ diff --git a/apps/coordinator-api/src/app/routers/partners.py b/apps/coordinator-api/src/app/routers/partners.py index 5a55b754..3b4cedd7 100755 --- a/apps/coordinator-api/src/app/routers/partners.py +++ b/apps/coordinator-api/src/app/routers/partners.py @@ -11,9 +11,11 @@ import secrets from datetime import datetime, timezone from typing import Any -from fastapi import APIRouter, Depends, HTTPException +from fastapi import APIRouter, Depends, HTTPException, Request from pydantic import BaseModel, Field +from aitbc.rate_limiting import rate_limit + from ..storage import get_session router = APIRouter(tags=["partners"]) @@ -63,7 +65,8 @@ WEBHOOKS_DB = {} @router.post("/partners/register", response_model=PartnerResponse) -async def register_partner(partner: PartnerRegister, session: Annotated[Session, Depends(get_session)]) -> PartnerResponse: +@rate_limit(rate=10, per=60) +async def register_partner(partner: PartnerRegister, request: Request, session: Annotated[Session, Depends(get_session)]) -> PartnerResponse: """Register a new partner application""" # Generate credentials @@ -105,7 +108,8 @@ async def register_partner(partner: PartnerRegister, session: Annotated[Session, @router.get("/partners/{partner_id}") -async def get_partner(partner_id: str, session: Annotated[Session, Depends(get_session)], api_key: str) -> dict[str, Any]: +@rate_limit(rate=50, per=60) +async def get_partner(partner_id: str, request: Request, session: Annotated[Session, Depends(get_session)], api_key: str) -> dict[str, Any]: """Get partner information""" # Verify API key @@ -125,8 +129,9 @@ async def get_partner(partner_id: str, session: Annotated[Session, Depends(get_s @router.post("/partners/webhooks", response_model=WebhookResponse) +@rate_limit(rate=20, per=60) async def create_webhook( - webhook: WebhookCreate, session: Annotated[Session, Depends(get_session)], api_key: str + webhook: WebhookCreate, request: Request, session: Annotated[Session, Depends(get_session)], api_key: str ) -> WebhookResponse: """Create a webhook subscription""" @@ -175,7 +180,8 @@ async def create_webhook( @router.get("/partners/webhooks") -async def list_webhooks(session: Annotated[Session, Depends(get_session)], api_key: str) -> list[WebhookResponse]: +@rate_limit(rate=50, per=60) +async def list_webhooks(request: Request, session: Annotated[Session, Depends(get_session)], api_key: str) -> list[WebhookResponse]: """List partner webhooks""" # Verify partner @@ -201,7 +207,8 @@ async def list_webhooks(session: Annotated[Session, Depends(get_session)], api_k @router.delete("/partners/webhooks/{webhook_id}") -async def delete_webhook(webhook_id: str, session: Annotated[Session, Depends(get_session)], api_key: str) -> dict[str, str]: +@rate_limit(rate=20, per=60) +async def delete_webhook(webhook_id: str, request: Request, session: Annotated[Session, Depends(get_session)], api_key: str) -> dict[str, str]: """Delete a webhook""" # Verify partner @@ -221,8 +228,9 @@ async def delete_webhook(webhook_id: str, session: Annotated[Session, Depends(ge @router.get("/partners/analytics/usage") +@rate_limit(rate=30, per=60) async def get_usage_analytics( - session: Annotated[Session, Depends(get_session)], api_key: str, period: str = "24h" + request: Request, session: Annotated[Session, Depends(get_session)], api_key: str, period: str = "24h" ) -> dict[str, Any]: """Get API usage analytics""" diff --git a/apps/coordinator-api/src/app/routers/users.py b/apps/coordinator-api/src/app/routers/users.py index 1a126408..b87f9f86 100755 --- a/apps/coordinator-api/src/app/routers/users.py +++ b/apps/coordinator-api/src/app/routers/users.py @@ -12,9 +12,11 @@ import uuid from datetime import datetime, timezone from typing import Any -from fastapi import APIRouter, Depends, HTTPException, status +from fastapi import APIRouter, Depends, HTTPException, status, Request from sqlmodel import Session, select +from aitbc.rate_limiting import rate_limit + from ..domain import User, Wallet from ..schemas import UserBalance, UserCreate, UserLogin, UserProfile from ..storage import get_session @@ -56,7 +58,8 @@ def verify_session_token(token: str) -> str | None: @router.post("/register", response_model=UserProfile) -async def register_user(user_data: UserCreate, session: Annotated[Session, Depends(get_session)]) -> dict[str, Any]: +@rate_limit(rate=10, per=60) +async def register_user(user_data: UserCreate, request: Request, session: Annotated[Session, Depends(get_session)]) -> dict[str, Any]: """Register a new user""" # Check if user already exists @@ -97,7 +100,8 @@ async def register_user(user_data: UserCreate, session: Annotated[Session, Depen @router.post("/login", response_model=UserProfile) -async def login_user(login_data: UserLogin, session: Annotated[Session, Depends(get_session)]) -> dict[str, Any]: +@rate_limit(rate=20, per=60) +async def login_user(login_data: UserLogin, request: Request, session: Annotated[Session, Depends(get_session)]) -> dict[str, Any]: """Login user with wallet address""" # For demo, we'll create or get user by wallet address @@ -144,7 +148,8 @@ async def login_user(login_data: UserLogin, session: Annotated[Session, Depends( @router.get("/users/me", response_model=UserProfile) -async def get_current_user(token: str, session: Annotated[Session, Depends(get_session)]) -> dict[str, Any]: +@rate_limit(rate=100, per=60) +async def get_current_user(token: str, request: Request, session: Annotated[Session, Depends(get_session)]) -> dict[str, Any]: """Get current user profile""" user_id = verify_session_token(token) @@ -165,7 +170,8 @@ async def get_current_user(token: str, session: Annotated[Session, Depends(get_s @router.get("/users/{user_id}/balance", response_model=UserBalance) -async def get_user_balance(user_id: str, session: Annotated[Session, Depends(get_session)]) -> dict[str, Any]: +@rate_limit(rate=50, per=60) +async def get_user_balance(user_id: str, request: Request, session: Annotated[Session, Depends(get_session)]) -> dict[str, Any]: """Get user's AITBC balance""" wallet = session.execute(select(Wallet).where(Wallet.user_id == user_id)).first() @@ -182,7 +188,8 @@ async def get_user_balance(user_id: str, session: Annotated[Session, Depends(get @router.post("/logout") -async def logout_user(token: str) -> dict[str, str]: +@rate_limit(rate=20, per=60) +async def logout_user(token: str, request: Request) -> dict[str, str]: """Logout user and invalidate session""" if token in user_sessions: @@ -192,7 +199,8 @@ async def logout_user(token: str) -> dict[str, str]: @router.get("/users/{user_id}/transactions") -async def get_user_transactions(user_id: str, session: Annotated[Session, Depends(get_session)]) -> dict[str, Any]: +@rate_limit(rate=50, per=60) +async def get_user_transactions(user_id: str, request: Request, session: Annotated[Session, Depends(get_session)]) -> dict[str, Any]: """Get user's transaction history""" # For demo, return empty list diff --git a/apps/coordinator-api/src/app/routers/web_vitals.py b/apps/coordinator-api/src/app/routers/web_vitals.py index 9c3305e5..2608801f 100755 --- a/apps/coordinator-api/src/app/routers/web_vitals.py +++ b/apps/coordinator-api/src/app/routers/web_vitals.py @@ -5,10 +5,12 @@ Web Vitals API endpoint for collecting performance metrics from typing import Any -from fastapi import APIRouter, HTTPException +from fastapi import APIRouter, Depends, HTTPException, Request from pydantic import BaseModel, Field +from sqlalchemy.orm import Session from aitbc import get_logger +from aitbc.rate_limiting import rate_limit from ..auth import get_api_key logger = get_logger(__name__) @@ -35,7 +37,8 @@ class WebVitalsMetric(BaseModel): @router.post("/web-vitals") -async def collect_web_vitals(metric: WebVitalsMetric) -> dict[str, Any]: +@rate_limit(rate=100, per=60) +async def collect_web_vitals(request: Request, metric: WebVitalsMetric) -> dict[str, Any]: """ Collect Web Vitals performance metrics from the frontend. This endpoint receives Core Web Vitals (LCP, FID, CLS, TTFB, FCP) for monitoring. @@ -79,6 +82,7 @@ async def collect_web_vitals(metric: WebVitalsMetric) -> dict[str, Any]: # Health check for web vitals endpoint @router.get("/web-vitals/health") -async def web_vitals_health() -> dict[str, str]: +@rate_limit(rate=1000, per=60) +async def web_vitals_health(request: Request) -> dict[str, str]: """Health check for web vitals collection endpoint""" return {"status": "healthy", "service": "web-vitals"} diff --git a/docs/ROADMAP.md b/docs/ROADMAP.md index fd4f492d..da12b50e 100644 --- a/docs/ROADMAP.md +++ b/docs/ROADMAP.md @@ -167,12 +167,45 @@ - Helper functions: is_feature_enabled(), get_feature_flag_manager() - Comprehensive tests: tests/test_feature_flags.py (30+ test cases, 404 lines) - Features: gradual rollouts, user whitelisting/blacklisting, percentage-based targeting, timestamp tracking - - [ ] Implement comprehensive observability - - [ ] Design contract upgrade pattern + - [DONE] Implement comprehensive observability - COMPLETED + - aitbc/metrics.py implements Prometheus metrics (Counter, Histogram, Gauge, Info) + - Metrics for: block processing, job processing, API requests, uptime, service info + - Decorators: @track_block_processing, @track_job_processing, @track_http_request + - Helper functions: update_block_height, update_jobs_in_queue, increment_service_restarts + - ASGI metrics endpoint via make_asgi_app() + - aitbc/monitoring.py implements MetricsCollector, PerformanceTimer, HealthChecker + - Health checks with overall status calculation (healthy, degraded, unhealthy) + - Alerting exists in apps/agent-coordinator/src/app/monitoring/alerting.py and apps/coordinator-api/src/app/utils/alerting.py + - Comprehensive tests: tests/test_metrics.py (30+ test cases, 251 lines), tests/test_monitoring.py (30+ test cases, 353 lines) + - Enhanced aitbc/aitbc_logging.py with structured JSON logging (StructuredFormatter, log_context, LogContext) + - Created aitbc/tracing.py for OpenTelemetry-based distributed tracing + - Tracing features: setup_tracing, instrument_fastapi, instrument_httpx, instrument_sqlalchemy + - Decorators: trace_function, trace_async_function for automatic instrumentation + - Context manager: trace_span for manual span creation + - Created aitbc/alerting.py for centralized alerting system (AlertManager, AlertRule, AlertChannel) + - Created metrics dashboard configuration at infra/monitoring/aitbc-dashboard.json + - All observability components tested and imports verified + - [DONE] Design contract upgrade pattern - COMPLETED + - apps/blockchain-node/src/aitbc_chain/contracts/upgrades.py implements comprehensive contract upgrade system (543 lines) + - Core components: UpgradeStatus enum, UpgradeType enum, ContractVersion dataclass, UpgradeProposal dataclass + - ContractUpgradeManager with proposal creation, stake-weighted governance voting, upgrade execution, rollback mechanism + - Features: voting deadlines (3-7 days), 60% approval requirement, 30% minimum participation, emergency upgrades (80% threshold) + - Rollback window (7 days), version history tracking, upgrade statistics + - Contract examples: guardian_contract.py (683 lines), agent_messaging_contract.py (520 lines) + - Global upgrade manager singleton pattern + - Security: proposer authorization, version validation, proposal deduplication ### Distribution & Binaries -- [ ] Debian stable miner binary (build workflow exists, binary built but distribution mechanism pending) +- [DONE] Debian stable miner binary - COMPLETED + - Build workflow exists: .gitea/workflows/build-miner-binary.yml + - Binary built using PyInstaller with vLLM and Ollama support + - Package includes: binary, README.md, install.sh, verify-install.sh, miner.env.template, SHA256SUMS + - Distribution mechanism implemented: Gitea releases API integration + - Updated build workflow to create Gitea releases and upload assets automatically + - Updated README.md to reference Gitea releases instead of GitHub + - Binary and package uploaded to Gitea releases on tag push + - Checksum verification supported via SHA256SUMS file - [ ] Binary distribution via GitHub Releases (deferred until v1 release - policy: no GitHub Releases before v1) ### Quality Assurance diff --git a/infra/monitoring/aitbc-dashboard.json b/infra/monitoring/aitbc-dashboard.json new file mode 100644 index 00000000..56e74f22 --- /dev/null +++ b/infra/monitoring/aitbc-dashboard.json @@ -0,0 +1,260 @@ +{ + "dashboard": { + "title": "AITBC System Dashboard", + "tags": ["aitbc", "blockchain", "ai"], + "timezone": "browser", + "schemaVersion": 16, + "version": 0, + "refresh": "10s", + "panels": [ + { + "id": 1, + "title": "Block Processing Duration", + "type": "graph", + "gridPos": {"h": 8, "w": 12, "x": 0, "y": 0}, + "targets": [ + { + "expr": "histogram_quantile(0.95, rate(block_processing_duration_seconds_bucket[5m]))", + "legendFormat": "95th percentile", + "refId": "A" + }, + { + "expr": "histogram_quantile(0.50, rate(block_processing_duration_seconds_bucket[5m]))", + "legendFormat": "50th percentile", + "refId": "B" + } + ], + "yaxes": [ + { + "format": "s", + "label": "Duration" + }, + { + "format": "short" + } + ], + "xaxis": { + "mode": "time", + "name": "Time", + "show": true + } + }, + { + "id": 2, + "title": "Block Height", + "type": "graph", + "gridPos": {"h": 8, "w": 12, "x": 12, "y": 0}, + "targets": [ + { + "expr": "block_height", + "legendFormat": "Block Height", + "refId": "A" + } + ], + "yaxes": [ + { + "format": "short", + "label": "Block Number" + }, + { + "format": "short" + } + ] + }, + { + "id": 3, + "title": "Job Processing Duration", + "type": "graph", + "gridPos": {"h": 8, "w": 12, "x": 0, "y": 8}, + "targets": [ + { + "expr": "histogram_quantile(0.95, rate(job_processing_duration_seconds_bucket[5m]))", + "legendFormat": "95th percentile", + "refId": "A" + }, + { + "expr": "histogram_quantile(0.50, rate(job_processing_duration_seconds_bucket[5m]))", + "legendFormat": "50th percentile", + "refId": "B" + } + ], + "yaxes": [ + { + "format": "s", + "label": "Duration" + }, + { + "format": "short" + } + ] + }, + { + "id": 4, + "title": "Jobs in Queue", + "type": "graph", + "gridPos": {"h": 8, "w": 12, "x": 12, "y": 8}, + "targets": [ + { + "expr": "jobs_in_queue", + "legendFormat": "Queued Jobs", + "refId": "A" + } + ], + "yaxes": [ + { + "format": "short", + "label": "Job Count" + }, + { + "format": "short" + } + ] + }, + { + "id": 5, + "title": "HTTP Request Duration", + "type": "graph", + "gridPos": {"h": 8, "w": 12, "x": 0, "y": 16}, + "targets": [ + { + "expr": "histogram_quantile(0.95, rate(http_request_duration_seconds_bucket[5m]))", + "legendFormat": "95th percentile", + "refId": "A" + }, + { + "expr": "histogram_quantile(0.50, rate(http_request_duration_seconds_bucket[5m]))", + "legendFormat": "50th percentile", + "refId": "B" + } + ], + "yaxes": [ + { + "format": "s", + "label": "Duration" + }, + { + "format": "short" + } + ] + }, + { + "id": 6, + "title": "HTTP Requests by Status", + "type": "graph", + "gridPos": {"h": 8, "w": 12, "x": 12, "y": 16}, + "targets": [ + { + "expr": "rate(http_requests_total[5m])", + "legendFormat": "{{method}} {{endpoint}} {{status}}", + "refId": "A" + } + ], + "yaxes": [ + { + "format": "reqps", + "label": "Requests/sec" + }, + { + "format": "short" + } + ] + }, + { + "id": 7, + "title": "Job Success Rate", + "type": "stat", + "gridPos": {"h": 4, "w": 6, "x": 0, "y": 24}, + "targets": [ + { + "expr": "rate(jobs_total{status=\"completed\"}[5m]) / rate(jobs_total[5m]) * 100", + "legendFormat": "Success Rate", + "refId": "A" + } + ], + "options": { + "colorMode": "value", + "graphMode": "area", + "orientation": "auto" + }, + "fieldConfig": { + "defaults": { + "unit": "percent", + "min": 0, + "max": 100 + } + } + }, + { + "id": 8, + "title": "Job Failure Rate", + "type": "stat", + "gridPos": {"h": 4, "w": 6, "x": 6, "y": 24}, + "targets": [ + { + "expr": "rate(jobs_total{status=\"failed\"}[5m]) / rate(jobs_total[5m]) * 100", + "legendFormat": "Failure Rate", + "refId": "A" + } + ], + "options": { + "colorMode": "value", + "graphMode": "area", + "orientation": "auto" + }, + "fieldConfig": { + "defaults": { + "unit": "percent", + "min": 0, + "max": 100 + } + } + }, + { + "id": 9, + "title": "Service Uptime", + "type": "stat", + "gridPos": {"h": 4, "w": 6, "x": 12, "y": 24}, + "targets": [ + { + "expr": "service_uptime_seconds", + "legendFormat": "Uptime", + "refId": "A" + } + ], + "options": { + "colorMode": "value", + "graphMode": "area", + "orientation": "auto" + }, + "fieldConfig": { + "defaults": { + "unit": "s" + } + } + }, + { + "id": 10, + "title": "Service Restarts", + "type": "stat", + "gridPos": {"h": 4, "w": 6, "x": 18, "y": 24}, + "targets": [ + { + "expr": "service_restart_count", + "legendFormat": "Restarts", + "refId": "A" + } + ], + "options": { + "colorMode": "value", + "graphMode": "area", + "orientation": "auto" + }, + "fieldConfig": { + "defaults": { + "unit": "short" + } + } + } + ] + } +}