""" Metrics collection for AITBC Enterprise Connectors """ import asyncio import time from typing import Dict, Any, Optional, List from collections import defaultdict, deque from dataclasses import dataclass, asdict from datetime import datetime, timedelta import json from .core import ConnectorConfig @dataclass class MetricPoint: """Single metric data point""" name: str value: float timestamp: datetime tags: Dict[str, str] = None def to_dict(self) -> Dict[str, Any]: return { "name": self.name, "value": self.value, "timestamp": self.timestamp.isoformat(), "tags": self.tags or {} } class MetricsCollector: """Collects and manages metrics for connectors""" def __init__(self, config: ConnectorConfig): self.config = config self.logger = __import__('logging').getLogger(f"aitbc.{self.__class__.__name__}") # Metric storage self._counters: Dict[str, float] = defaultdict(float) self._gauges: Dict[str, float] = {} self._histograms: Dict[str, deque] = defaultdict(lambda: deque(maxlen=1000)) self._timers: Dict[str, List[float]] = defaultdict(list) # Runtime state self._running = False self._flush_task = None self._buffer: List[MetricPoint] = [] self._buffer_size = 1000 # Aggregated metrics self._request_count = 0 self._error_count = 0 self._total_duration = 0.0 self._last_flush = None async def start(self): """Start metrics collection""" if self._running: return self._running = True self._last_flush = datetime.utcnow() # Start periodic flush task if self.config.metrics_endpoint: self._flush_task = asyncio.create_task(self._flush_loop()) self.logger.info("Metrics collection started") async def stop(self): """Stop metrics collection""" if not self._running: return self._running = False # Cancel flush task if self._flush_task: self._flush_task.cancel() try: await self._flush_task except asyncio.CancelledError: pass # Final flush await self._flush_metrics() self.logger.info("Metrics collection stopped") def increment(self, name: str, value: float = 1.0, tags: Dict[str, str] = None): """Increment counter metric""" key = self._make_key(name, tags) self._counters[key] += value # Add to buffer self._add_to_buffer(name, value, tags) def gauge(self, name: str, value: float, tags: Dict[str, str] = None): """Set gauge metric""" key = self._make_key(name, tags) self._gauges[key] = value # Add to buffer self._add_to_buffer(name, value, tags) def histogram(self, name: str, value: float, tags: Dict[str, str] = None): """Add value to histogram""" key = self._make_key(name, tags) self._histograms[key].append(value) # Add to buffer self._add_to_buffer(name, value, tags) def timer(self, name: str, duration: float, tags: Dict[str, str] = None): """Record timing metric""" key = self._make_key(name, tags) self._timers[key].append(duration) # Keep only last 1000 timings if len(self._timers[key]) > 1000: self._timers[key] = self._timers[key][-1000:] # Add to buffer self._add_to_buffer(f"{name}_duration", duration, tags) async def record_request( self, method: str, path: str, status: int, duration: float ): """Record request metrics""" # Update aggregated metrics self._request_count += 1 self._total_duration += duration if status >= 400: self._error_count += 1 # Record detailed metrics tags = { "method": method, "path": path, "status": str(status) } self.increment("requests_total", 1.0, tags) self.timer("request_duration", duration, tags) if status >= 400: self.increment("errors_total", 1.0, tags) def get_metric(self, name: str, tags: Dict[str, str] = None) -> Optional[float]: """Get current metric value""" key = self._make_key(name, tags) if key in self._counters: return self._counters[key] elif key in self._gauges: return self._gauges[key] elif key in self._histograms: values = list(self._histograms[key]) return sum(values) / len(values) if values else 0 elif key in self._timers: values = self._timers[key] return sum(values) / len(values) if values else 0 return None def get_summary(self) -> Dict[str, Any]: """Get metrics summary""" return { "requests_total": self._request_count, "errors_total": self._error_count, "error_rate": self._error_count / max(self._request_count, 1), "avg_duration": self._total_duration / max(self._request_count, 1), "last_flush": self._last_flush.isoformat() if self._last_flush else None, "metrics_count": len(self._counters) + len(self._gauges) + len(self._histograms) + len(self._timers) } def _make_key(self, name: str, tags: Dict[str, str] = None) -> str: """Create metric key with tags""" if not tags: return name tag_str = ",".join(f"{k}={v}" for k, v in sorted(tags.items())) return f"{name}[{tag_str}]" def _add_to_buffer(self, name: str, value: float, tags: Dict[str, str] = None): """Add metric point to buffer""" point = MetricPoint( name=name, value=value, timestamp=datetime.utcnow(), tags=tags ) self._buffer.append(point) # Flush if buffer is full if len(self._buffer) >= self._buffer_size: asyncio.create_task(self._flush_metrics()) async def _flush_loop(self): """Periodic flush loop""" while self._running: try: await asyncio.sleep(60) # Flush every minute await self._flush_metrics() except asyncio.CancelledError: break except Exception as e: self.logger.error(f"Flush loop error: {e}") async def _flush_metrics(self): """Flush metrics to endpoint""" if not self.config.metrics_endpoint or not self._buffer: return try: import aiohttp # Prepare metrics payload payload = { "timestamp": datetime.utcnow().isoformat(), "source": "aitbc-enterprise-sdk", "metrics": [asdict(point) for point in self._buffer] } # Send to endpoint async with aiohttp.ClientSession() as session: async with session.post( self.config.metrics_endpoint, json=payload, timeout=10 ) as response: if response.status == 200: self._buffer.clear() self._last_flush = datetime.utcnow() self.logger.debug(f"Flushed {len(payload['metrics'])} metrics") else: self.logger.error(f"Failed to flush metrics: {response.status}") except Exception as e: self.logger.error(f"Error flushing metrics: {e}") class PerformanceTracker: """Track performance metrics for operations""" def __init__(self, metrics: MetricsCollector): self.metrics = metrics self._operations: Dict[str, float] = {} def start_operation(self, operation: str): """Start timing an operation""" self._operations[operation] = time.time() def end_operation(self, operation: str, tags: Dict[str, str] = None): """End timing an operation""" if operation in self._operations: duration = time.time() - self._operations[operation] del self._operations[operation] self.metrics.timer(f"operation_{operation}", duration, tags) return duration return None async def track_operation(self, operation: str, coro, tags: Dict[str, str] = None): """Context manager for tracking operations""" start = time.time() try: result = await coro success = True return result except Exception as e: success = False raise finally: duration = time.time() - start metric_tags = { "operation": operation, "success": str(success), **(tags or {}) } self.metrics.timer(f"operation_{operation}", duration, metric_tags) self.metrics.increment(f"operations_total", 1.0, metric_tags)