chore(security): enhance environment configuration, CI workflows, and wallet daemon with security improvements
- Restructure .env.example with security-focused documentation, service-specific environment file references, and AWS Secrets Manager integration - Update CLI tests workflow to single Python 3.13 version, add pytest-mock dependency, and consolidate test execution with coverage - Add comprehensive security validation to package publishing workflow with manual approval gates, secret scanning, and release
This commit is contained in:
47
dev/cache/aitbc_cache/__init__.py
vendored
Normal file
47
dev/cache/aitbc_cache/__init__.py
vendored
Normal file
@@ -0,0 +1,47 @@
|
||||
"""
|
||||
AITBC Event-Driven Cache Package
|
||||
|
||||
Provides distributed caching with event-driven invalidation for GPU marketplace
|
||||
and other real-time data that needs immediate propagation across edge nodes.
|
||||
"""
|
||||
|
||||
from .event_driven_cache import (
|
||||
EventDrivenCacheManager,
|
||||
CacheEventType,
|
||||
CacheEvent,
|
||||
CacheConfig,
|
||||
cache_manager,
|
||||
cached_result
|
||||
)
|
||||
|
||||
from .gpu_marketplace_cache import (
|
||||
GPUMarketplaceCacheManager,
|
||||
GPUInfo,
|
||||
BookingInfo,
|
||||
MarketStats,
|
||||
init_marketplace_cache,
|
||||
get_marketplace_cache,
|
||||
marketplace_cache
|
||||
)
|
||||
|
||||
__version__ = "1.0.0"
|
||||
__author__ = "AITBC Team"
|
||||
|
||||
__all__ = [
|
||||
# Core event-driven caching
|
||||
"EventDrivenCacheManager",
|
||||
"CacheEventType",
|
||||
"CacheEvent",
|
||||
"CacheConfig",
|
||||
"cache_manager",
|
||||
"cached_result",
|
||||
|
||||
# GPU marketplace caching
|
||||
"GPUMarketplaceCacheManager",
|
||||
"GPUInfo",
|
||||
"BookingInfo",
|
||||
"MarketStats",
|
||||
"init_marketplace_cache",
|
||||
"get_marketplace_cache",
|
||||
"marketplace_cache"
|
||||
]
|
||||
343
dev/cache/aitbc_cache/config.py
vendored
Normal file
343
dev/cache/aitbc_cache/config.py
vendored
Normal file
@@ -0,0 +1,343 @@
|
||||
"""
|
||||
Cache Configuration for AITBC Event-Driven Caching System
|
||||
|
||||
Configuration settings for Redis distributed caching with event-driven invalidation
|
||||
across global edge nodes for GPU marketplace and real-time data.
|
||||
"""
|
||||
|
||||
import os
|
||||
from typing import Dict, Any, Optional
|
||||
from dataclasses import dataclass
|
||||
|
||||
|
||||
@dataclass
|
||||
class RedisConfig:
|
||||
"""Redis connection configuration"""
|
||||
host: str = "localhost"
|
||||
port: int = 6379
|
||||
db: int = 0
|
||||
password: Optional[str] = None
|
||||
ssl: bool = False
|
||||
max_connections: int = 20
|
||||
socket_timeout: int = 5
|
||||
socket_connect_timeout: int = 5
|
||||
retry_on_timeout: bool = True
|
||||
health_check_interval: int = 30
|
||||
|
||||
|
||||
@dataclass
|
||||
class CacheConfig:
|
||||
"""Cache behavior configuration"""
|
||||
l1_cache_size: int = 1000
|
||||
l1_ttl_multiplier: float = 0.5 # L1 cache TTL as fraction of L2 TTL
|
||||
event_queue_size: int = 10000
|
||||
event_processing_timeout: int = 30
|
||||
invalidation_batch_size: int = 100
|
||||
stats_retention_hours: int = 24
|
||||
health_check_interval: int = 60
|
||||
|
||||
|
||||
@dataclass
|
||||
class EdgeNodeConfig:
|
||||
"""Edge node configuration"""
|
||||
node_id: Optional[str] = None
|
||||
region: str = "default"
|
||||
datacenter: str = "default"
|
||||
rack: Optional[str] = None
|
||||
availability_zone: Optional[str] = None
|
||||
network_tier: str = "standard" # standard, premium, edge
|
||||
cache_tier: str = "edge" # edge, regional, global
|
||||
|
||||
|
||||
@dataclass
|
||||
class EventDrivenCacheSettings:
|
||||
"""Complete event-driven cache settings"""
|
||||
redis: RedisConfig
|
||||
cache: CacheConfig
|
||||
edge_node: EdgeNodeConfig
|
||||
|
||||
# Feature flags
|
||||
enable_l1_cache: bool = True
|
||||
enable_event_driven_invalidation: bool = True
|
||||
enable_compression: bool = True
|
||||
enable_metrics: bool = True
|
||||
enable_health_checks: bool = True
|
||||
|
||||
# Performance settings
|
||||
connection_pool_size: int = 20
|
||||
max_event_queue_size: int = 10000
|
||||
event_processing_workers: int = 4
|
||||
cache_warmup_enabled: bool = True
|
||||
|
||||
# Security settings
|
||||
enable_tls: bool = False
|
||||
require_auth: bool = False
|
||||
auth_token: Optional[str] = None
|
||||
|
||||
|
||||
def load_config_from_env() -> EventDrivenCacheSettings:
|
||||
"""Load configuration from environment variables"""
|
||||
|
||||
# Redis configuration
|
||||
redis_config = RedisConfig(
|
||||
host=os.getenv("REDIS_HOST", "localhost"),
|
||||
port=int(os.getenv("REDIS_PORT", "6379")),
|
||||
db=int(os.getenv("REDIS_DB", "0")),
|
||||
password=os.getenv("REDIS_PASSWORD"),
|
||||
ssl=os.getenv("REDIS_SSL", "false").lower() == "true",
|
||||
max_connections=int(os.getenv("REDIS_MAX_CONNECTIONS", "20")),
|
||||
socket_timeout=int(os.getenv("REDIS_SOCKET_TIMEOUT", "5")),
|
||||
socket_connect_timeout=int(os.getenv("REDIS_SOCKET_CONNECT_TIMEOUT", "5")),
|
||||
retry_on_timeout=os.getenv("REDIS_RETRY_ON_TIMEOUT", "true").lower() == "true",
|
||||
health_check_interval=int(os.getenv("REDIS_HEALTH_CHECK_INTERVAL", "30"))
|
||||
)
|
||||
|
||||
# Cache configuration
|
||||
cache_config = CacheConfig(
|
||||
l1_cache_size=int(os.getenv("CACHE_L1_SIZE", "1000")),
|
||||
l1_ttl_multiplier=float(os.getenv("CACHE_L1_TTL_MULTIPLIER", "0.5")),
|
||||
event_queue_size=int(os.getenv("CACHE_EVENT_QUEUE_SIZE", "10000")),
|
||||
event_processing_timeout=int(os.getenv("CACHE_EVENT_PROCESSING_TIMEOUT", "30")),
|
||||
invalidation_batch_size=int(os.getenv("CACHE_INVALIDATION_BATCH_SIZE", "100")),
|
||||
stats_retention_hours=int(os.getenv("CACHE_STATS_RETENTION_HOURS", "24")),
|
||||
health_check_interval=int(os.getenv("CACHE_HEALTH_CHECK_INTERVAL", "60"))
|
||||
)
|
||||
|
||||
# Edge node configuration
|
||||
edge_node_config = EdgeNodeConfig(
|
||||
node_id=os.getenv("EDGE_NODE_ID"),
|
||||
region=os.getenv("EDGE_NODE_REGION", "default"),
|
||||
datacenter=os.getenv("EDGE_NODE_DATACENTER", "default"),
|
||||
rack=os.getenv("EDGE_NODE_RACK"),
|
||||
availability_zone=os.getenv("EDGE_NODE_AVAILABILITY_ZONE"),
|
||||
network_tier=os.getenv("EDGE_NODE_NETWORK_TIER", "standard"),
|
||||
cache_tier=os.getenv("EDGE_NODE_CACHE_TIER", "edge")
|
||||
)
|
||||
|
||||
# Feature flags
|
||||
enable_l1_cache = os.getenv("CACHE_ENABLE_L1", "true").lower() == "true"
|
||||
enable_event_driven_invalidation = os.getenv("CACHE_ENABLE_EVENT_DRIVEN", "true").lower() == "true"
|
||||
enable_compression = os.getenv("CACHE_ENABLE_COMPRESSION", "true").lower() == "true"
|
||||
enable_metrics = os.getenv("CACHE_ENABLE_METRICS", "true").lower() == "true"
|
||||
enable_health_checks = os.getenv("CACHE_ENABLE_HEALTH_CHECKS", "true").lower() == "true"
|
||||
|
||||
# Performance settings
|
||||
connection_pool_size = int(os.getenv("CACHE_CONNECTION_POOL_SIZE", "20"))
|
||||
max_event_queue_size = int(os.getenv("CACHE_MAX_EVENT_QUEUE_SIZE", "10000"))
|
||||
event_processing_workers = int(os.getenv("CACHE_EVENT_PROCESSING_WORKERS", "4"))
|
||||
cache_warmup_enabled = os.getenv("CACHE_WARMUP_ENABLED", "true").lower() == "true"
|
||||
|
||||
# Security settings
|
||||
enable_tls = os.getenv("CACHE_ENABLE_TLS", "false").lower() == "true"
|
||||
require_auth = os.getenv("CACHE_REQUIRE_AUTH", "false").lower() == "true"
|
||||
auth_token = os.getenv("CACHE_AUTH_TOKEN")
|
||||
|
||||
return EventDrivenCacheSettings(
|
||||
redis=redis_config,
|
||||
cache=cache_config,
|
||||
edge_node=edge_node_config,
|
||||
enable_l1_cache=enable_l1_cache,
|
||||
enable_event_driven_invalidation=enable_event_driven_invalidation,
|
||||
enable_compression=enable_compression,
|
||||
enable_metrics=enable_metrics,
|
||||
enable_health_checks=enable_health_checks,
|
||||
connection_pool_size=connection_pool_size,
|
||||
max_event_queue_size=max_event_queue_size,
|
||||
event_processing_workers=event_processing_workers,
|
||||
cache_warmup_enabled=cache_warmup_enabled,
|
||||
enable_tls=enable_tls,
|
||||
require_auth=require_auth,
|
||||
auth_token=auth_token
|
||||
)
|
||||
|
||||
|
||||
def get_redis_url(config: RedisConfig) -> str:
|
||||
"""Construct Redis URL from configuration"""
|
||||
auth_part = ""
|
||||
if config.password:
|
||||
auth_part = f":{config.password}@"
|
||||
|
||||
ssl_part = "s" if config.ssl else ""
|
||||
|
||||
return f"redis{ssl_part}://{auth_part}{config.host}:{config.port}/{config.db}"
|
||||
|
||||
|
||||
# Default configurations for different environments
|
||||
|
||||
def get_development_config() -> EventDrivenCacheSettings:
|
||||
"""Development environment configuration"""
|
||||
return EventDrivenCacheSettings(
|
||||
redis=RedisConfig(
|
||||
host="localhost",
|
||||
port=6379,
|
||||
db=1, # Use different DB for development
|
||||
ssl=False
|
||||
),
|
||||
cache=CacheConfig(
|
||||
l1_cache_size=100, # Smaller cache for development
|
||||
l1_ttl_multiplier=0.3,
|
||||
event_queue_size=1000
|
||||
),
|
||||
edge_node=EdgeNodeConfig(
|
||||
node_id="dev_node",
|
||||
region="development"
|
||||
),
|
||||
enable_metrics=False, # Disable overhead in development
|
||||
enable_health_checks=False
|
||||
)
|
||||
|
||||
|
||||
def get_staging_config() -> EventDrivenCacheSettings:
|
||||
"""Staging environment configuration"""
|
||||
return EventDrivenCacheSettings(
|
||||
redis=RedisConfig(
|
||||
host="redis-staging.internal",
|
||||
port=6379,
|
||||
db=0,
|
||||
ssl=True
|
||||
),
|
||||
cache=CacheConfig(
|
||||
l1_cache_size=500,
|
||||
l1_ttl_multiplier=0.4,
|
||||
event_queue_size=5000
|
||||
),
|
||||
edge_node=EdgeNodeConfig(
|
||||
node_id=None, # Auto-generate
|
||||
region="staging"
|
||||
),
|
||||
enable_metrics=True,
|
||||
enable_health_checks=True
|
||||
)
|
||||
|
||||
|
||||
def get_production_config() -> EventDrivenCacheSettings:
|
||||
"""Production environment configuration"""
|
||||
return EventDrivenCacheSettings(
|
||||
redis=RedisConfig(
|
||||
host=os.getenv("REDIS_CLUSTER_HOST", "redis-cluster.internal"),
|
||||
port=int(os.getenv("REDIS_CLUSTER_PORT", "6379")),
|
||||
db=0,
|
||||
password=os.getenv("REDIS_CLUSTER_PASSWORD"),
|
||||
ssl=True,
|
||||
max_connections=50,
|
||||
socket_timeout=10,
|
||||
socket_connect_timeout=10,
|
||||
health_check_interval=15
|
||||
),
|
||||
cache=CacheConfig(
|
||||
l1_cache_size=2000,
|
||||
l1_ttl_multiplier=0.6,
|
||||
event_queue_size=20000,
|
||||
event_processing_timeout=60,
|
||||
invalidation_batch_size=200,
|
||||
health_check_interval=30
|
||||
),
|
||||
edge_node=EdgeNodeConfig(
|
||||
node_id=None, # Auto-generate from hostname/IP
|
||||
region=os.getenv("EDGE_NODE_REGION", "global"),
|
||||
datacenter=os.getenv("EDGE_NODE_DATACENTER"),
|
||||
availability_zone=os.getenv("EDGE_NODE_AZ"),
|
||||
network_tier="premium",
|
||||
cache_tier="edge"
|
||||
),
|
||||
enable_l1_cache=True,
|
||||
enable_event_driven_invalidation=True,
|
||||
enable_compression=True,
|
||||
enable_metrics=True,
|
||||
enable_health_checks=True,
|
||||
connection_pool_size=50,
|
||||
max_event_queue_size=20000,
|
||||
event_processing_workers=8,
|
||||
cache_warmup_enabled=True,
|
||||
enable_tls=True,
|
||||
require_auth=True,
|
||||
auth_token=os.getenv("CACHE_AUTH_TOKEN")
|
||||
)
|
||||
|
||||
|
||||
def get_edge_node_config(region: str) -> EventDrivenCacheSettings:
|
||||
"""Configuration for edge nodes in specific regions"""
|
||||
base_config = get_production_config()
|
||||
|
||||
# Override edge node specific settings
|
||||
base_config.edge_node.region = region
|
||||
base_config.edge_node.cache_tier = "edge"
|
||||
base_config.edge_node.network_tier = "edge"
|
||||
|
||||
# Edge nodes have smaller L1 cache but faster event processing
|
||||
base_config.cache.l1_cache_size = 500
|
||||
base_config.cache.l1_ttl_multiplier = 0.3
|
||||
base_config.event_processing_workers = 2
|
||||
|
||||
return base_config
|
||||
|
||||
|
||||
def get_regional_cache_config(region: str) -> EventDrivenCacheSettings:
|
||||
"""Configuration for regional cache nodes"""
|
||||
base_config = get_production_config()
|
||||
|
||||
# Override regional cache settings
|
||||
base_config.edge_node.region = region
|
||||
base_config.edge_node.cache_tier = "regional"
|
||||
base_config.edge_node.network_tier = "premium"
|
||||
|
||||
# Regional caches have larger L1 cache
|
||||
base_config.cache.l1_cache_size = 5000
|
||||
base_config.cache.l1_ttl_multiplier = 0.8
|
||||
base_config.event_processing_workers = 6
|
||||
|
||||
return base_config
|
||||
|
||||
|
||||
# Configuration validation
|
||||
def validate_config(config: EventDrivenCacheSettings) -> bool:
|
||||
"""Validate cache configuration"""
|
||||
errors = []
|
||||
|
||||
# Redis configuration validation
|
||||
if not config.redis.host:
|
||||
errors.append("Redis host is required")
|
||||
|
||||
if not (1 <= config.redis.port <= 65535):
|
||||
errors.append("Redis port must be between 1 and 65535")
|
||||
|
||||
if not (0 <= config.redis.db <= 15):
|
||||
errors.append("Redis DB must be between 0 and 15")
|
||||
|
||||
# Cache configuration validation
|
||||
if config.cache.l1_cache_size <= 0:
|
||||
errors.append("L1 cache size must be positive")
|
||||
|
||||
if not (0.1 <= config.cache.l1_ttl_multiplier <= 1.0):
|
||||
errors.append("L1 TTL multiplier must be between 0.1 and 1.0")
|
||||
|
||||
if config.cache.event_queue_size <= 0:
|
||||
errors.append("Event queue size must be positive")
|
||||
|
||||
# Edge node configuration validation
|
||||
if not config.edge_node.region:
|
||||
errors.append("Edge node region is required")
|
||||
|
||||
if config.edge_node.cache_tier not in ["edge", "regional", "global"]:
|
||||
errors.append("Cache tier must be one of: edge, regional, global")
|
||||
|
||||
if errors:
|
||||
raise ValueError(f"Configuration validation failed: {', '.join(errors)}")
|
||||
|
||||
return True
|
||||
|
||||
|
||||
# Environment-specific configuration loader
|
||||
def get_config_for_environment(env: str = None) -> EventDrivenCacheSettings:
|
||||
"""Get configuration for specific environment"""
|
||||
env = env or os.getenv("ENVIRONMENT", "development").lower()
|
||||
|
||||
if env == "production":
|
||||
return get_production_config()
|
||||
elif env == "staging":
|
||||
return get_staging_config()
|
||||
elif env == "development":
|
||||
return get_development_config()
|
||||
else:
|
||||
# Default to environment variables
|
||||
return load_config_from_env()
|
||||
587
dev/cache/aitbc_cache/event_driven_cache.py
vendored
Normal file
587
dev/cache/aitbc_cache/event_driven_cache.py
vendored
Normal file
@@ -0,0 +1,587 @@
|
||||
"""
|
||||
Event-Driven Redis Caching Strategy for Distributed Edge Nodes
|
||||
|
||||
Implements a distributed caching system with event-driven cache invalidation
|
||||
for GPU availability and pricing data that changes on booking/cancellation.
|
||||
"""
|
||||
|
||||
import json
|
||||
import asyncio
|
||||
import logging
|
||||
import time
|
||||
from typing import Dict, List, Optional, Any, Set, Callable
|
||||
from dataclasses import dataclass, asdict
|
||||
from enum import Enum
|
||||
from datetime import datetime, timedelta
|
||||
import hashlib
|
||||
import uuid
|
||||
|
||||
import redis.asyncio as redis
|
||||
from redis.asyncio import ConnectionPool
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class CacheEventType(Enum):
|
||||
"""Types of cache events"""
|
||||
GPU_AVAILABILITY_CHANGED = "gpu_availability_changed"
|
||||
PRICING_UPDATED = "pricing_updated"
|
||||
BOOKING_CREATED = "booking_created"
|
||||
BOOKING_CANCELLED = "booking_cancelled"
|
||||
PROVIDER_STATUS_CHANGED = "provider_status_changed"
|
||||
MARKET_STATS_UPDATED = "market_stats_updated"
|
||||
ORDER_BOOK_UPDATED = "order_book_updated"
|
||||
MANUAL_INVALIDATION = "manual_invalidation"
|
||||
|
||||
|
||||
@dataclass
|
||||
class CacheEvent:
|
||||
"""Cache invalidation event"""
|
||||
event_type: CacheEventType
|
||||
resource_id: str
|
||||
data: Dict[str, Any]
|
||||
timestamp: float
|
||||
source_node: str
|
||||
event_id: str
|
||||
affected_namespaces: List[str]
|
||||
|
||||
|
||||
@dataclass
|
||||
class CacheConfig:
|
||||
"""Cache configuration for different data types"""
|
||||
namespace: str
|
||||
ttl_seconds: int
|
||||
event_driven: bool
|
||||
critical_data: bool # Data that needs immediate propagation
|
||||
max_memory_mb: int
|
||||
|
||||
|
||||
class EventDrivenCacheManager:
|
||||
"""
|
||||
Event-driven cache manager for distributed edge nodes
|
||||
|
||||
Features:
|
||||
- Redis pub/sub for real-time cache invalidation
|
||||
- Multi-tier caching (L1 memory + L2 Redis)
|
||||
- Event-driven updates for critical data
|
||||
- Automatic failover and recovery
|
||||
- Distributed cache coordination
|
||||
"""
|
||||
|
||||
def __init__(self,
|
||||
redis_url: str = "redis://localhost:6379/0",
|
||||
node_id: str = None,
|
||||
edge_node_region: str = "default"):
|
||||
self.redis_url = redis_url
|
||||
self.node_id = node_id or f"edge_node_{uuid.uuid4().hex[:8]}"
|
||||
self.edge_node_region = edge_node_region
|
||||
|
||||
# Redis connections
|
||||
self.redis_client = None
|
||||
self.pubsub = None
|
||||
self.connection_pool = None
|
||||
|
||||
# Event handling
|
||||
self.event_handlers: Dict[CacheEventType, List[Callable]] = {}
|
||||
self.event_queue = asyncio.Queue()
|
||||
self.is_running = False
|
||||
|
||||
# Local L1 cache for critical data
|
||||
self.l1_cache: Dict[str, Dict] = {}
|
||||
self.l1_max_size = 1000
|
||||
|
||||
# Cache configurations
|
||||
self.cache_configs = self._init_cache_configs()
|
||||
|
||||
# Statistics
|
||||
self.stats = {
|
||||
'events_processed': 0,
|
||||
'cache_hits': 0,
|
||||
'cache_misses': 0,
|
||||
'invalidations': 0,
|
||||
'last_event_time': None
|
||||
}
|
||||
|
||||
def _init_cache_configs(self) -> Dict[str, CacheConfig]:
|
||||
"""Initialize cache configurations for different data types"""
|
||||
return {
|
||||
# GPU availability - changes frequently, needs immediate propagation
|
||||
'gpu_availability': CacheConfig(
|
||||
namespace='gpu_avail',
|
||||
ttl_seconds=30, # Short TTL, but event-driven invalidation
|
||||
event_driven=True,
|
||||
critical_data=True,
|
||||
max_memory_mb=100
|
||||
),
|
||||
|
||||
# GPU pricing - changes on booking/cancellation
|
||||
'gpu_pricing': CacheConfig(
|
||||
namespace='gpu_pricing',
|
||||
ttl_seconds=60, # Medium TTL with event-driven updates
|
||||
event_driven=True,
|
||||
critical_data=True,
|
||||
max_memory_mb=50
|
||||
),
|
||||
|
||||
# Order book - very dynamic
|
||||
'order_book': CacheConfig(
|
||||
namespace='order_book',
|
||||
ttl_seconds=5, # Very short TTL
|
||||
event_driven=True,
|
||||
critical_data=True,
|
||||
max_memory_mb=200
|
||||
),
|
||||
|
||||
# Provider status - changes on provider state changes
|
||||
'provider_status': CacheConfig(
|
||||
namespace='provider_status',
|
||||
ttl_seconds=120, # Longer TTL with event-driven updates
|
||||
event_driven=True,
|
||||
critical_data=False,
|
||||
max_memory_mb=50
|
||||
),
|
||||
|
||||
# Market statistics - computed periodically
|
||||
'market_stats': CacheConfig(
|
||||
namespace='market_stats',
|
||||
ttl_seconds=300, # 5 minutes
|
||||
event_driven=True,
|
||||
critical_data=False,
|
||||
max_memory_mb=100
|
||||
),
|
||||
|
||||
# Historical data - static, longer TTL
|
||||
'historical_data': CacheConfig(
|
||||
namespace='historical',
|
||||
ttl_seconds=3600, # 1 hour
|
||||
event_driven=False,
|
||||
critical_data=False,
|
||||
max_memory_mb=500
|
||||
)
|
||||
}
|
||||
|
||||
async def connect(self):
|
||||
"""Connect to Redis and setup pub/sub"""
|
||||
try:
|
||||
# Create connection pool
|
||||
self.connection_pool = ConnectionPool.from_url(
|
||||
self.redis_url,
|
||||
decode_responses=True,
|
||||
max_connections=20
|
||||
)
|
||||
|
||||
# Create Redis client
|
||||
self.redis_client = redis.Redis(connection_pool=self.connection_pool)
|
||||
|
||||
# Test connection
|
||||
await self.redis_client.ping()
|
||||
|
||||
# Setup pub/sub for cache invalidation events
|
||||
self.pubsub = self.redis_client.pubsub()
|
||||
await self.pubsub.subscribe('cache_invalidation_events')
|
||||
|
||||
# Start event processing
|
||||
self.is_running = True
|
||||
asyncio.create_task(self._process_events())
|
||||
asyncio.create_task(self._listen_for_events())
|
||||
|
||||
logger.info(f"Connected to Redis cache manager. Node ID: {self.node_id}")
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to connect to Redis: {e}")
|
||||
raise
|
||||
|
||||
async def disconnect(self):
|
||||
"""Disconnect from Redis and cleanup"""
|
||||
self.is_running = False
|
||||
|
||||
if self.pubsub:
|
||||
await self.pubsub.unsubscribe('cache_invalidation_events')
|
||||
await self.pubsub.close()
|
||||
|
||||
if self.redis_client:
|
||||
await self.redis_client.close()
|
||||
|
||||
if self.connection_pool:
|
||||
await self.connection_pool.disconnect()
|
||||
|
||||
logger.info("Disconnected from Redis cache manager")
|
||||
|
||||
def _generate_cache_key(self, namespace: str, params: Dict[str, Any]) -> str:
|
||||
"""Generate deterministic cache key"""
|
||||
param_str = json.dumps(params, sort_keys=True)
|
||||
param_hash = hashlib.sha256(param_str.encode()).hexdigest()
|
||||
return f"{namespace}:{param_hash}"
|
||||
|
||||
async def get(self, cache_type: str, params: Dict[str, Any]) -> Optional[Any]:
|
||||
"""Get data from cache with L1/L2 fallback"""
|
||||
config = self.cache_configs.get(cache_type)
|
||||
if not config:
|
||||
raise ValueError(f"Unknown cache type: {cache_type}")
|
||||
|
||||
cache_key = self._generate_cache_key(config.namespace, params)
|
||||
|
||||
# 1. Try L1 memory cache first (fastest)
|
||||
if cache_key in self.l1_cache:
|
||||
cache_entry = self.l1_cache[cache_key]
|
||||
if cache_entry['expires_at'] > time.time():
|
||||
self.stats['cache_hits'] += 1
|
||||
logger.debug(f"L1 cache hit for {cache_key}")
|
||||
return cache_entry['data']
|
||||
else:
|
||||
# Expired, remove from L1
|
||||
del self.l1_cache[cache_key]
|
||||
|
||||
# 2. Try L2 Redis cache
|
||||
if self.redis_client:
|
||||
try:
|
||||
cached_data = await self.redis_client.get(cache_key)
|
||||
if cached_data:
|
||||
self.stats['cache_hits'] += 1
|
||||
logger.debug(f"L2 cache hit for {cache_key}")
|
||||
|
||||
data = json.loads(cached_data)
|
||||
|
||||
# Backfill L1 cache for critical data
|
||||
if config.critical_data and len(self.l1_cache) < self.l1_max_size:
|
||||
self.l1_cache[cache_key] = {
|
||||
'data': data,
|
||||
'expires_at': time.time() + min(config.ttl_seconds, 60)
|
||||
}
|
||||
|
||||
return data
|
||||
except Exception as e:
|
||||
logger.warning(f"Redis get failed: {e}")
|
||||
|
||||
self.stats['cache_misses'] += 1
|
||||
return None
|
||||
|
||||
async def set(self, cache_type: str, params: Dict[str, Any], data: Any,
|
||||
custom_ttl: int = None, publish_event: bool = True):
|
||||
"""Set data in cache with optional event publishing"""
|
||||
config = self.cache_configs.get(cache_type)
|
||||
if not config:
|
||||
raise ValueError(f"Unknown cache type: {cache_type}")
|
||||
|
||||
cache_key = self._generate_cache_key(config.namespace, params)
|
||||
ttl = custom_ttl or config.ttl_seconds
|
||||
|
||||
# 1. Set L1 cache for critical data
|
||||
if config.critical_data:
|
||||
self._update_l1_cache(cache_key, data, ttl)
|
||||
|
||||
# 2. Set L2 Redis cache
|
||||
if self.redis_client:
|
||||
try:
|
||||
serialized_data = json.dumps(data, default=str)
|
||||
await self.redis_client.setex(cache_key, ttl, serialized_data)
|
||||
|
||||
# Publish invalidation event if event-driven
|
||||
if publish_event and config.event_driven:
|
||||
await self._publish_invalidation_event(
|
||||
CacheEventType.MANUAL_INVALIDATION,
|
||||
cache_type,
|
||||
{'cache_key': cache_key, 'action': 'updated'},
|
||||
[config.namespace]
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Redis set failed: {e}")
|
||||
|
||||
def _update_l1_cache(self, cache_key: str, data: Any, ttl: int):
|
||||
"""Update L1 cache with size management"""
|
||||
# Remove oldest entries if cache is full
|
||||
while len(self.l1_cache) >= self.l1_max_size:
|
||||
oldest_key = min(self.l1_cache.keys(),
|
||||
key=lambda k: self.l1_cache[k]['expires_at'])
|
||||
del self.l1_cache[oldest_key]
|
||||
|
||||
self.l1_cache[cache_key] = {
|
||||
'data': data,
|
||||
'expires_at': time.time() + ttl
|
||||
}
|
||||
|
||||
async def invalidate_cache(self, cache_type: str, resource_id: str = None,
|
||||
reason: str = "manual"):
|
||||
"""Invalidate cache entries and publish event"""
|
||||
config = self.cache_configs.get(cache_type)
|
||||
if not config:
|
||||
raise ValueError(f"Unknown cache type: {cache_type}")
|
||||
|
||||
# Invalidate L1 cache
|
||||
keys_to_remove = []
|
||||
for key in self.l1_cache:
|
||||
if key.startswith(config.namespace):
|
||||
if resource_id is None or resource_id in key:
|
||||
keys_to_remove.append(key)
|
||||
|
||||
for key in keys_to_remove:
|
||||
del self.l1_cache[key]
|
||||
|
||||
# Invalidate L2 Redis cache
|
||||
if self.redis_client:
|
||||
try:
|
||||
pattern = f"{config.namespace}:*"
|
||||
if resource_id:
|
||||
pattern = f"{config.namespace}:*{resource_id}*"
|
||||
|
||||
cursor = 0
|
||||
while True:
|
||||
cursor, keys = await self.redis_client.scan(
|
||||
cursor=cursor, match=pattern, count=100
|
||||
)
|
||||
if keys:
|
||||
await self.redis_client.delete(*keys)
|
||||
if cursor == 0:
|
||||
break
|
||||
|
||||
self.stats['invalidations'] += 1
|
||||
|
||||
# Publish invalidation event
|
||||
await self._publish_invalidation_event(
|
||||
CacheEventType.MANUAL_INVALIDATION,
|
||||
cache_type,
|
||||
{'resource_id': resource_id, 'reason': reason},
|
||||
[config.namespace]
|
||||
)
|
||||
|
||||
logger.info(f"Invalidated {cache_type} cache: {reason}")
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Cache invalidation failed: {e}")
|
||||
|
||||
async def _publish_invalidation_event(self, event_type: CacheEventType,
|
||||
resource_id: str, data: Dict[str, Any],
|
||||
affected_namespaces: List[str]):
|
||||
"""Publish cache invalidation event to Redis pub/sub"""
|
||||
event = CacheEvent(
|
||||
event_type=event_type,
|
||||
resource_id=resource_id,
|
||||
data=data,
|
||||
timestamp=time.time(),
|
||||
source_node=self.node_id,
|
||||
event_id=str(uuid.uuid4()),
|
||||
affected_namespaces=affected_namespaces
|
||||
)
|
||||
|
||||
try:
|
||||
event_json = json.dumps(asdict(event), default=str)
|
||||
await self.redis_client.publish('cache_invalidation_events', event_json)
|
||||
logger.debug(f"Published invalidation event: {event_type.value}")
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to publish event: {e}")
|
||||
|
||||
async def _listen_for_events(self):
|
||||
"""Listen for cache invalidation events from other nodes"""
|
||||
while self.is_running:
|
||||
try:
|
||||
message = await self.pubsub.get_message(timeout=1.0)
|
||||
if message and message['type'] == 'message':
|
||||
await self._handle_invalidation_event(message['data'])
|
||||
except Exception as e:
|
||||
logger.error(f"Event listener error: {e}")
|
||||
await asyncio.sleep(1)
|
||||
|
||||
async def _handle_invalidation_event(self, event_json: str):
|
||||
"""Handle incoming cache invalidation event"""
|
||||
try:
|
||||
event_data = json.loads(event_json)
|
||||
|
||||
# Ignore events from this node
|
||||
if event_data.get('source_node') == self.node_id:
|
||||
return
|
||||
|
||||
# Queue event for processing
|
||||
await self.event_queue.put(event_data)
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to handle invalidation event: {e}")
|
||||
|
||||
async def _process_events(self):
|
||||
"""Process queued invalidation events"""
|
||||
while self.is_running:
|
||||
try:
|
||||
event_data = await asyncio.wait_for(
|
||||
self.event_queue.get(), timeout=1.0
|
||||
)
|
||||
|
||||
await self._process_invalidation_event(event_data)
|
||||
self.stats['events_processed'] += 1
|
||||
self.stats['last_event_time'] = time.time()
|
||||
|
||||
except asyncio.TimeoutError:
|
||||
continue
|
||||
except Exception as e:
|
||||
logger.error(f"Event processing error: {e}")
|
||||
|
||||
async def _process_invalidation_event(self, event_data: Dict[str, Any]):
|
||||
"""Process a single invalidation event"""
|
||||
event_type = CacheEventType(event_data['event_type'])
|
||||
affected_namespaces = event_data['affected_namespaces']
|
||||
|
||||
# Invalidate L1 cache entries
|
||||
for namespace in affected_namespaces:
|
||||
keys_to_remove = []
|
||||
for key in self.l1_cache:
|
||||
if key.startswith(namespace):
|
||||
keys_to_remove.append(key)
|
||||
|
||||
for key in keys_to_remove:
|
||||
del self.l1_cache[key]
|
||||
|
||||
# Invalidate L2 cache entries
|
||||
if self.redis_client:
|
||||
try:
|
||||
for namespace in affected_namespaces:
|
||||
pattern = f"{namespace}:*"
|
||||
cursor = 0
|
||||
while True:
|
||||
cursor, keys = await self.redis_client.scan(
|
||||
cursor=cursor, match=pattern, count=100
|
||||
)
|
||||
if keys:
|
||||
await self.redis_client.delete(*keys)
|
||||
if cursor == 0:
|
||||
break
|
||||
|
||||
logger.debug(f"Processed invalidation event: {event_type.value}")
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to process invalidation event: {e}")
|
||||
|
||||
# Event-specific methods for common operations
|
||||
|
||||
async def notify_gpu_availability_change(self, gpu_id: str, new_status: str):
|
||||
"""Notify about GPU availability change"""
|
||||
await self._publish_invalidation_event(
|
||||
CacheEventType.GPU_AVAILABILITY_CHANGED,
|
||||
f"gpu_{gpu_id}",
|
||||
{'gpu_id': gpu_id, 'status': new_status},
|
||||
['gpu_avail']
|
||||
)
|
||||
|
||||
async def notify_pricing_update(self, gpu_type: str, new_price: float):
|
||||
"""Notify about GPU pricing update"""
|
||||
await self._publish_invalidation_event(
|
||||
CacheEventType.PRICING_UPDATED,
|
||||
f"price_{gpu_type}",
|
||||
{'gpu_type': gpu_type, 'price': new_price},
|
||||
['gpu_pricing']
|
||||
)
|
||||
|
||||
async def notify_booking_created(self, booking_id: str, gpu_id: str):
|
||||
"""Notify about new booking creation"""
|
||||
await self._publish_invalidation_event(
|
||||
CacheEventType.BOOKING_CREATED,
|
||||
f"booking_{booking_id}",
|
||||
{'booking_id': booking_id, 'gpu_id': gpu_id},
|
||||
['gpu_avail', 'gpu_pricing', 'order_book']
|
||||
)
|
||||
|
||||
async def notify_booking_cancelled(self, booking_id: str, gpu_id: str):
|
||||
"""Notify about booking cancellation"""
|
||||
await self._publish_invalidation_event(
|
||||
CacheEventType.BOOKING_CANCELLED,
|
||||
f"booking_{booking_id}",
|
||||
{'booking_id': booking_id, 'gpu_id': gpu_id},
|
||||
['gpu_avail', 'gpu_pricing', 'order_book']
|
||||
)
|
||||
|
||||
async def get_cache_stats(self) -> Dict[str, Any]:
|
||||
"""Get cache performance statistics"""
|
||||
stats = self.stats.copy()
|
||||
|
||||
# Add L1 cache size
|
||||
stats['l1_cache_size'] = len(self.l1_cache)
|
||||
stats['l1_cache_max_size'] = self.l1_max_size
|
||||
|
||||
# Add Redis info if available
|
||||
if self.redis_client:
|
||||
try:
|
||||
info = await self.redis_client.info('memory')
|
||||
stats['redis_memory_used_mb'] = info['used_memory'] / (1024 * 1024)
|
||||
stats['redis_connected_clients'] = info.get('connected_clients', 0)
|
||||
except Exception as e:
|
||||
logger.warning(f"Failed to get Redis info: {e}")
|
||||
|
||||
return stats
|
||||
|
||||
async def health_check(self) -> Dict[str, Any]:
|
||||
"""Perform health check of the cache system"""
|
||||
health = {
|
||||
'status': 'healthy',
|
||||
'redis_connected': False,
|
||||
'pubsub_active': False,
|
||||
'event_queue_size': 0,
|
||||
'last_event_age': None
|
||||
}
|
||||
|
||||
try:
|
||||
# Check Redis connection
|
||||
if self.redis_client:
|
||||
await self.redis_client.ping()
|
||||
health['redis_connected'] = True
|
||||
|
||||
# Check pub/sub
|
||||
if self.pubsub and self.is_running:
|
||||
health['pubsub_active'] = True
|
||||
|
||||
# Check event queue
|
||||
health['event_queue_size'] = self.event_queue.qsize()
|
||||
|
||||
# Check last event time
|
||||
if self.stats['last_event_time']:
|
||||
health['last_event_age'] = time.time() - self.stats['last_event_time']
|
||||
|
||||
# Overall status
|
||||
if not health['redis_connected']:
|
||||
health['status'] = 'degraded'
|
||||
if not health['pubsub_active']:
|
||||
health['status'] = 'unhealthy'
|
||||
|
||||
except Exception as e:
|
||||
health['status'] = 'unhealthy'
|
||||
health['error'] = str(e)
|
||||
|
||||
return health
|
||||
|
||||
|
||||
# Global cache manager instance
|
||||
cache_manager = EventDrivenCacheManager()
|
||||
|
||||
|
||||
# Decorator for automatic cache management
|
||||
def cached_result(cache_type: str, ttl: int = None, key_params: List[str] = None):
|
||||
"""
|
||||
Decorator to automatically cache function results
|
||||
|
||||
Args:
|
||||
cache_type: Type of cache to use
|
||||
ttl: Custom TTL override
|
||||
key_params: List of parameter names to include in cache key
|
||||
"""
|
||||
def decorator(func):
|
||||
async def wrapper(*args, **kwargs):
|
||||
# Generate cache key from specified parameters
|
||||
if key_params:
|
||||
cache_key_params = {}
|
||||
for i, param_name in enumerate(key_params):
|
||||
if i < len(args):
|
||||
cache_key_params[param_name] = args[i]
|
||||
elif param_name in kwargs:
|
||||
cache_key_params[param_name] = kwargs[param_name]
|
||||
else:
|
||||
cache_key_params = {'args': args, 'kwargs': kwargs}
|
||||
|
||||
# Try to get from cache
|
||||
cached_result = await cache_manager.get(cache_type, cache_key_params)
|
||||
if cached_result is not None:
|
||||
return cached_result
|
||||
|
||||
# Execute function and cache result
|
||||
result = await func(*args, **kwargs)
|
||||
await cache_manager.set(cache_type, cache_key_params, result, ttl)
|
||||
|
||||
return result
|
||||
return wrapper
|
||||
return decorator
|
||||
498
dev/cache/aitbc_cache/gpu_marketplace_cache.py
vendored
Normal file
498
dev/cache/aitbc_cache/gpu_marketplace_cache.py
vendored
Normal file
@@ -0,0 +1,498 @@
|
||||
"""
|
||||
GPU Marketplace Cache Manager
|
||||
|
||||
Specialized cache manager for GPU marketplace data with event-driven invalidation
|
||||
for availability and pricing changes on booking/cancellation.
|
||||
"""
|
||||
|
||||
import asyncio
|
||||
import logging
|
||||
from typing import Dict, List, Optional, Any, Tuple
|
||||
from dataclasses import dataclass
|
||||
from datetime import datetime, timedelta
|
||||
import json
|
||||
|
||||
from .event_driven_cache import (
|
||||
EventDrivenCacheManager,
|
||||
CacheEventType,
|
||||
cached_result
|
||||
)
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
@dataclass
|
||||
class GPUInfo:
|
||||
"""GPU information structure"""
|
||||
gpu_id: str
|
||||
provider_id: str
|
||||
gpu_type: str
|
||||
memory_gb: int
|
||||
cuda_cores: int
|
||||
base_price_per_hour: float
|
||||
current_price_per_hour: float
|
||||
availability_status: str # 'available', 'busy', 'offline', 'maintenance'
|
||||
region: str
|
||||
performance_score: float
|
||||
last_updated: datetime
|
||||
|
||||
|
||||
@dataclass
|
||||
class BookingInfo:
|
||||
"""Booking information structure"""
|
||||
booking_id: str
|
||||
gpu_id: str
|
||||
user_id: str
|
||||
start_time: datetime
|
||||
end_time: datetime
|
||||
status: str # 'active', 'completed', 'cancelled'
|
||||
total_cost: float
|
||||
created_at: datetime
|
||||
|
||||
|
||||
@dataclass
|
||||
class MarketStats:
|
||||
"""Market statistics structure"""
|
||||
total_gpus: int
|
||||
available_gpus: int
|
||||
busy_gpus: int
|
||||
average_price_per_hour: float
|
||||
total_bookings_24h: int
|
||||
total_volume_24h: float
|
||||
utilization_rate: float
|
||||
last_updated: datetime
|
||||
|
||||
|
||||
class GPUMarketplaceCacheManager:
|
||||
"""
|
||||
Specialized cache manager for GPU marketplace
|
||||
|
||||
Features:
|
||||
- Real-time GPU availability tracking
|
||||
- Dynamic pricing with immediate propagation
|
||||
- Event-driven cache invalidation on booking changes
|
||||
- Regional cache optimization
|
||||
- Performance-based GPU ranking
|
||||
"""
|
||||
|
||||
def __init__(self, cache_manager: EventDrivenCacheManager):
|
||||
self.cache = cache_manager
|
||||
self.regions = set()
|
||||
self.gpu_types = set()
|
||||
|
||||
# Register event handlers
|
||||
self._register_event_handlers()
|
||||
|
||||
def _register_event_handlers(self):
|
||||
"""Register handlers for cache invalidation events"""
|
||||
# These handlers will be called when events are received
|
||||
self.cache.event_handlers[CacheEventType.GPU_AVAILABILITY_CHANGED] = [
|
||||
self._handle_gpu_availability_change
|
||||
]
|
||||
self.cache.event_handlers[CacheEventType.PRICING_UPDATED] = [
|
||||
self._handle_pricing_update
|
||||
]
|
||||
self.cache.event_handlers[CacheEventType.BOOKING_CREATED] = [
|
||||
self._handle_booking_created
|
||||
]
|
||||
self.cache.event_handlers[CacheEventType.BOOKING_CANCELLED] = [
|
||||
self._handle_booking_cancelled
|
||||
]
|
||||
|
||||
# GPU Availability Methods
|
||||
|
||||
async def get_gpu_availability(self,
|
||||
region: str = None,
|
||||
gpu_type: str = None,
|
||||
include_busy: bool = False) -> List[GPUInfo]:
|
||||
"""Get GPU availability with filtering options"""
|
||||
params = {
|
||||
'region': region,
|
||||
'gpu_type': gpu_type,
|
||||
'include_busy': include_busy,
|
||||
'timestamp': datetime.utcnow().isoformat()
|
||||
}
|
||||
|
||||
cached_data = await self.cache.get('gpu_availability', params)
|
||||
if cached_data:
|
||||
return [GPUInfo(**gpu) for gpu in cached_data]
|
||||
|
||||
# In real implementation, this would query the database
|
||||
# For now, return empty list to be populated by real data
|
||||
return []
|
||||
|
||||
async def set_gpu_availability(self, gpus: List[GPUInfo]):
|
||||
"""Set GPU availability data"""
|
||||
gpu_data = [asdict(gpu) for gpu in gpus]
|
||||
|
||||
# Update regions and GPU types tracking
|
||||
for gpu in gpus:
|
||||
self.regions.add(gpu.region)
|
||||
self.gpu_types.add(gpu.gpu_type)
|
||||
|
||||
# Cache with different parameter combinations
|
||||
await self.cache.set('gpu_availability', {}, gpu_data)
|
||||
|
||||
# Cache filtered views
|
||||
for region in self.regions:
|
||||
region_gpus = [asdict(gpu) for gpu in gpus if gpu.region == region]
|
||||
await self.cache.set('gpu_availability',
|
||||
{'region': region}, region_gpus)
|
||||
|
||||
for gpu_type in self.gpu_types:
|
||||
type_gpus = [asdict(gpu) for gpu in gpus if gpu.gpu_type == gpu_type]
|
||||
await self.cache.set('gpu_availability',
|
||||
{'gpu_type': gpu_type}, type_gpus)
|
||||
|
||||
async def update_gpu_status(self, gpu_id: str, new_status: str):
|
||||
"""Update individual GPU status and notify"""
|
||||
# Get current GPU data
|
||||
gpus = await self.get_gpu_availability()
|
||||
updated_gpu = None
|
||||
|
||||
for gpu in gpus:
|
||||
if gpu.gpu_id == gpu_id:
|
||||
gpu.availability_status = new_status
|
||||
gpu.last_updated = datetime.utcnow()
|
||||
updated_gpu = gpu
|
||||
break
|
||||
|
||||
if updated_gpu:
|
||||
# Update cache
|
||||
await self.set_gpu_availability(gpus)
|
||||
|
||||
# Publish event for immediate propagation
|
||||
await self.cache.notify_gpu_availability_change(gpu_id, new_status)
|
||||
|
||||
logger.info(f"Updated GPU {gpu_id} status to {new_status}")
|
||||
|
||||
# Pricing Methods
|
||||
|
||||
async def get_gpu_pricing(self,
|
||||
gpu_type: str = None,
|
||||
region: str = None) -> Dict[str, float]:
|
||||
"""Get current GPU pricing"""
|
||||
params = {
|
||||
'gpu_type': gpu_type,
|
||||
'region': region,
|
||||
'timestamp': datetime.utcnow().isoformat()
|
||||
}
|
||||
|
||||
cached_data = await self.cache.get('gpu_pricing', params)
|
||||
if cached_data:
|
||||
return cached_data
|
||||
|
||||
# Return empty pricing to be populated by real data
|
||||
return {}
|
||||
|
||||
async def update_gpu_pricing(self, gpu_type: str, new_price: float, region: str = None):
|
||||
"""Update GPU pricing and notify"""
|
||||
# Get current pricing
|
||||
current_pricing = await self.get_gpu_pricing(gpu_type, region)
|
||||
|
||||
pricing_key = f"{gpu_type}_{region}" if region else gpu_type
|
||||
current_pricing[pricing_key] = new_price
|
||||
|
||||
# Update cache
|
||||
await self.cache.set('gpu_pricing',
|
||||
{'gpu_type': gpu_type, 'region': region},
|
||||
current_pricing)
|
||||
|
||||
# Publish event for immediate propagation
|
||||
await self.cache.notify_pricing_update(gpu_type, new_price)
|
||||
|
||||
logger.info(f"Updated {gpu_type} pricing to {new_price}")
|
||||
|
||||
async def get_dynamic_pricing(self, gpu_id: str) -> float:
|
||||
"""Get dynamic pricing for a specific GPU"""
|
||||
params = {'gpu_id': gpu_id}
|
||||
|
||||
cached_price = await self.cache.get('gpu_pricing', params)
|
||||
if cached_price:
|
||||
return cached_price
|
||||
|
||||
# Calculate dynamic pricing based on demand and availability
|
||||
gpus = await self.get_gpu_availability()
|
||||
target_gpu = next((gpu for gpu in gpus if gpu.gpu_id == gpu_id), None)
|
||||
|
||||
if not target_gpu:
|
||||
return 0.0
|
||||
|
||||
# Simple dynamic pricing logic
|
||||
base_price = target_gpu.base_price_per_hour
|
||||
availability_multiplier = 1.0
|
||||
|
||||
# Increase price based on demand (lower availability)
|
||||
total_gpus = len(gpus)
|
||||
available_gpus = len([g for g in gpus if g.availability_status == 'available'])
|
||||
|
||||
if total_gpus > 0:
|
||||
availability_ratio = available_gpus / total_gpus
|
||||
if availability_ratio < 0.1: # Less than 10% available
|
||||
availability_multiplier = 2.0
|
||||
elif availability_ratio < 0.3: # Less than 30% available
|
||||
availability_multiplier = 1.5
|
||||
elif availability_ratio < 0.5: # Less than 50% available
|
||||
availability_multiplier = 1.2
|
||||
|
||||
dynamic_price = base_price * availability_multiplier
|
||||
|
||||
# Cache the calculated price
|
||||
await self.cache.set('gpu_pricing', params, {'price': dynamic_price})
|
||||
|
||||
return dynamic_price
|
||||
|
||||
# Booking Methods
|
||||
|
||||
async def create_booking(self, booking: BookingInfo) -> bool:
|
||||
"""Create a new booking and update caches"""
|
||||
try:
|
||||
# In real implementation, save to database first
|
||||
# For now, just update caches
|
||||
|
||||
# Update GPU availability
|
||||
await self.update_gpu_status(booking.gpu_id, 'busy')
|
||||
|
||||
# Update pricing (might change due to reduced availability)
|
||||
gpus = await self.get_gpu_availability()
|
||||
target_gpu = next((gpu for gpu in gpus if gpu.gpu_id == booking.gpu_id), None)
|
||||
|
||||
if target_gpu:
|
||||
new_price = await self.get_dynamic_pricing(booking.gpu_id)
|
||||
await self.update_gpu_pricing(target_gpu.gpu_type, new_price, target_gpu.region)
|
||||
|
||||
# Publish booking creation event
|
||||
await self.cache.notify_booking_created(booking.booking_id, booking.gpu_id)
|
||||
|
||||
# Invalidate relevant caches
|
||||
await self.cache.invalidate_cache('order_book')
|
||||
await self.cache.invalidate_cache('market_stats')
|
||||
|
||||
logger.info(f"Created booking {booking.booking_id} for GPU {booking.gpu_id}")
|
||||
return True
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to create booking: {e}")
|
||||
return False
|
||||
|
||||
async def cancel_booking(self, booking_id: str, gpu_id: str) -> bool:
|
||||
"""Cancel a booking and update caches"""
|
||||
try:
|
||||
# Update GPU availability
|
||||
await self.update_gpu_status(gpu_id, 'available')
|
||||
|
||||
# Update pricing (might change due to increased availability)
|
||||
gpus = await self.get_gpu_availability()
|
||||
target_gpu = next((gpu for gpu in gpus if gpu.gpu_id == gpu_id), None)
|
||||
|
||||
if target_gpu:
|
||||
new_price = await self.get_dynamic_pricing(gpu_id)
|
||||
await self.update_gpu_pricing(target_gpu.gpu_type, new_price, target_gpu.region)
|
||||
|
||||
# Publish booking cancellation event
|
||||
await self.cache.notify_booking_cancelled(booking_id, gpu_id)
|
||||
|
||||
# Invalidate relevant caches
|
||||
await self.cache.invalidate_cache('order_book')
|
||||
await self.cache.invalidate_cache('market_stats')
|
||||
|
||||
logger.info(f"Cancelled booking {booking_id} for GPU {gpu_id}")
|
||||
return True
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to cancel booking: {e}")
|
||||
return False
|
||||
|
||||
# Market Statistics
|
||||
|
||||
async def get_market_stats(self) -> MarketStats:
|
||||
"""Get current market statistics"""
|
||||
params = {'timestamp': datetime.utcnow().isoformat()}
|
||||
|
||||
cached_data = await self.cache.get('market_stats', params)
|
||||
if cached_data:
|
||||
return MarketStats(**cached_data)
|
||||
|
||||
# Calculate statistics from current data
|
||||
gpus = await self.get_gpu_availability()
|
||||
|
||||
total_gpus = len(gpus)
|
||||
available_gpus = len([g for g in gpus if g.availability_status == 'available'])
|
||||
busy_gpus = len([g for g in gpus if g.availability_status == 'busy'])
|
||||
|
||||
# Calculate average price
|
||||
prices = [g.current_price_per_hour for g in gpus if g.availability_status == 'available']
|
||||
avg_price = sum(prices) / len(prices) if prices else 0.0
|
||||
|
||||
utilization_rate = busy_gpus / total_gpus if total_gpus > 0 else 0.0
|
||||
|
||||
stats = MarketStats(
|
||||
total_gpus=total_gpus,
|
||||
available_gpus=available_gpus,
|
||||
busy_gpus=busy_gpus,
|
||||
average_price_per_hour=avg_price,
|
||||
total_bookings_24h=0, # Would be calculated from database
|
||||
total_volume_24h=0.0, # Would be calculated from database
|
||||
utilization_rate=utilization_rate,
|
||||
last_updated=datetime.utcnow()
|
||||
)
|
||||
|
||||
# Cache the statistics
|
||||
await self.cache.set('market_stats', params, asdict(stats))
|
||||
|
||||
return stats
|
||||
|
||||
# Event Handlers
|
||||
|
||||
async def _handle_gpu_availability_change(self, event_data: Dict[str, Any]):
|
||||
"""Handle GPU availability change event"""
|
||||
gpu_id = event_data['data']['gpu_id']
|
||||
new_status = event_data['data']['status']
|
||||
|
||||
# Invalidate GPU availability cache
|
||||
await self.cache.invalidate_cache('gpu_availability')
|
||||
|
||||
# Invalidate market stats
|
||||
await self.cache.invalidate_cache('market_stats')
|
||||
|
||||
logger.debug(f"Handled GPU availability change: {gpu_id} -> {new_status}")
|
||||
|
||||
async def _handle_pricing_update(self, event_data: Dict[str, Any]):
|
||||
"""Handle pricing update event"""
|
||||
gpu_type = event_data['data']['gpu_type']
|
||||
new_price = event_data['data']['price']
|
||||
|
||||
# Invalidate pricing cache
|
||||
await self.cache.invalidate_cache('gpu_pricing')
|
||||
|
||||
# Invalidate market stats
|
||||
await self.cache.invalidate_cache('market_stats')
|
||||
|
||||
logger.debug(f"Handled pricing update: {gpu_type} -> {new_price}")
|
||||
|
||||
async def _handle_booking_created(self, event_data: Dict[str, Any]):
|
||||
"""Handle booking creation event"""
|
||||
booking_id = event_data['data']['booking_id']
|
||||
gpu_id = event_data['data']['gpu_id']
|
||||
|
||||
# Invalidate caches affected by new booking
|
||||
await self.cache.invalidate_cache('gpu_availability')
|
||||
await self.cache.invalidate_cache('gpu_pricing')
|
||||
await self.cache.invalidate_cache('order_book')
|
||||
await self.cache.invalidate_cache('market_stats')
|
||||
|
||||
logger.debug(f"Handled booking creation: {booking_id}")
|
||||
|
||||
async def _handle_booking_cancelled(self, event_data: Dict[str, Any]):
|
||||
"""Handle booking cancellation event"""
|
||||
booking_id = event_data['data']['booking_id']
|
||||
gpu_id = event_data['data']['gpu_id']
|
||||
|
||||
# Invalidate caches affected by cancellation
|
||||
await self.cache.invalidate_cache('gpu_availability')
|
||||
await self.cache.invalidate_cache('gpu_pricing')
|
||||
await self.cache.invalidate_cache('order_book')
|
||||
await self.cache.invalidate_cache('market_stats')
|
||||
|
||||
logger.debug(f"Handled booking cancellation: {booking_id}")
|
||||
|
||||
# Utility Methods
|
||||
|
||||
async def get_top_performing_gpus(self, limit: int = 10) -> List[GPUInfo]:
|
||||
"""Get top performing GPUs by performance score"""
|
||||
gpus = await self.get_gpu_availability()
|
||||
|
||||
# Filter available GPUs and sort by performance score
|
||||
available_gpus = [gpu for gpu in gpus if gpu.availability_status == 'available']
|
||||
sorted_gpus = sorted(available_gpus,
|
||||
key=lambda gpu: gpu.performance_score,
|
||||
reverse=True)
|
||||
|
||||
return sorted_gpus[:limit]
|
||||
|
||||
async def get_cheapest_gpus(self, limit: int = 10, gpu_type: str = None) -> List[GPUInfo]:
|
||||
"""Get cheapest available GPUs"""
|
||||
gpus = await self.get_gpu_availability(gpu_type=gpu_type)
|
||||
|
||||
# Filter available GPUs and sort by price
|
||||
available_gpus = [gpu for gpu in gpus if gpu.availability_status == 'available']
|
||||
sorted_gpus = sorted(available_gpus,
|
||||
key=lambda gpu: gpu.current_price_per_hour)
|
||||
|
||||
return sorted_gpus[:limit]
|
||||
|
||||
async def search_gpus(self,
|
||||
min_memory: int = None,
|
||||
min_cuda_cores: int = None,
|
||||
max_price: float = None,
|
||||
region: str = None) -> List[GPUInfo]:
|
||||
"""Search GPUs with specific criteria"""
|
||||
gpus = await self.get_gpu_availability(region=region)
|
||||
|
||||
filtered_gpus = []
|
||||
for gpu in gpus:
|
||||
if gpu.availability_status != 'available':
|
||||
continue
|
||||
|
||||
if min_memory and gpu.memory_gb < min_memory:
|
||||
continue
|
||||
|
||||
if min_cuda_cores and gpu.cuda_cores < min_cuda_cores:
|
||||
continue
|
||||
|
||||
if max_price and gpu.current_price_per_hour > max_price:
|
||||
continue
|
||||
|
||||
filtered_gpus.append(gpu)
|
||||
|
||||
return filtered_gpus
|
||||
|
||||
async def get_cache_health(self) -> Dict[str, Any]:
|
||||
"""Get comprehensive cache health report"""
|
||||
health = await self.cache.health_check()
|
||||
|
||||
# Add marketplace-specific metrics
|
||||
marketplace_metrics = {
|
||||
'regions_count': len(self.regions),
|
||||
'gpu_types_count': len(self.gpu_types),
|
||||
'last_gpu_update': None,
|
||||
'last_pricing_update': None
|
||||
}
|
||||
|
||||
# Get last update times from cache stats
|
||||
stats = await self.cache.get_cache_stats()
|
||||
if stats['last_event_time']:
|
||||
marketplace_metrics['last_event_age'] = time.time() - stats['last_event_time']
|
||||
|
||||
health['marketplace_metrics'] = marketplace_metrics
|
||||
health['cache_stats'] = stats
|
||||
|
||||
return health
|
||||
|
||||
|
||||
# Global marketplace cache manager instance
|
||||
marketplace_cache = None
|
||||
|
||||
|
||||
async def init_marketplace_cache(redis_url: str = "redis://localhost:6379/0",
|
||||
node_id: str = None,
|
||||
region: str = "default") -> GPUMarketplaceCacheManager:
|
||||
"""Initialize the global marketplace cache manager"""
|
||||
global marketplace_cache
|
||||
|
||||
# Initialize cache manager
|
||||
cache_manager = EventDrivenCacheManager(redis_url, node_id, region)
|
||||
await cache_manager.connect()
|
||||
|
||||
# Initialize marketplace cache manager
|
||||
marketplace_cache = GPUMarketplaceCacheManager(cache_manager)
|
||||
|
||||
logger.info("GPU Marketplace Cache Manager initialized")
|
||||
return marketplace_cache
|
||||
|
||||
|
||||
async def get_marketplace_cache() -> GPUMarketplaceCacheManager:
|
||||
"""Get the global marketplace cache manager"""
|
||||
if marketplace_cache is None:
|
||||
raise RuntimeError("Marketplace cache not initialized. Call init_marketplace_cache() first.")
|
||||
return marketplace_cache
|
||||
Reference in New Issue
Block a user