Extract GPU domain models and services to gpu-service

- Created domain directory with gpu_marketplace.py containing GPU domain models:
  - GPUArchitecture enum
  - GPURegistry
  - ConsumerGPUProfile
  - EdgeGPUMetrics
  - GPUBooking
  - GPUReview
- Created data directory with consumer_gpu_profiles.py
- Created services directory with edge_gpu_service.py
- Created storage.py for database session management
- Updated main.py to include database initialization and GPU router endpoints:
  - GET /v1/marketplace/edge-gpu/profiles
  - GET /v1/marketplace/edge-gpu/metrics/{gpu_id}
  - POST /v1/marketplace/edge-gpu/scan/{miner_id}
  - POST /v1/marketplace/edge-gpu/optimize/inference/{gpu_id}

This completes Phase 4.3a-4.3b: Extract GPU domain models and services
This commit is contained in:
aitbc
2026-04-30 11:23:04 +02:00
parent 6f6b66cef5
commit d838c77a5f
6 changed files with 358 additions and 1 deletions

View File

@@ -0,0 +1,21 @@
"""
GPU Service domain models
"""
from .gpu_marketplace import (
GPUArchitecture,
GPURegistry,
ConsumerGPUProfile,
EdgeGPUMetrics,
GPUBooking,
GPUReview,
)
__all__ = [
"GPUArchitecture",
"GPURegistry",
"ConsumerGPUProfile",
"EdgeGPUMetrics",
"GPUBooking",
"GPUReview",
]

View File

@@ -0,0 +1,154 @@
"""Persistent SQLModel tables for the GPU marketplace."""
from __future__ import annotations
from datetime import datetime
from enum import StrEnum
from uuid import uuid4
from sqlalchemy import JSON, Column
from sqlmodel import Field, SQLModel
class GPUArchitecture(StrEnum):
TURING = "turing" # RTX 20 series
AMPERE = "ampere" # RTX 30 series
ADA_LOVELACE = "ada_lovelace" # RTX 40 series
PASCAL = "pascal" # GTX 10 series
VOLTA = "volta" # Titan V, Tesla V100
UNKNOWN = "unknown"
class GPURegistry(SQLModel, table=True):
"""Registered GPUs available in the marketplace."""
__tablename__ = "gpu_registry"
__table_args__ = {"extend_existing": True}
id: str = Field(default_factory=lambda: f"gpu_{uuid4().hex[:8]}", primary_key=True)
miner_id: str = Field(index=True)
model: str = Field(index=True)
memory_gb: int = Field(default=0)
cuda_version: str = Field(default="")
region: str = Field(default="", index=True)
price_per_hour: float = Field(default=0.0)
status: str = Field(default="available", index=True) # available, booked, offline
capabilities: list = Field(default_factory=list, sa_column=Column(JSON, nullable=False))
average_rating: float = Field(default=0.0)
total_reviews: int = Field(default=0)
created_at: datetime = Field(default_factory=datetime.now(datetime.UTC), nullable=False, index=True)
class ConsumerGPUProfile(SQLModel, table=True):
"""Consumer GPU optimization profiles for edge computing"""
__tablename__ = "consumer_gpu_profiles"
__table_args__ = {"extend_existing": True}
id: str = Field(default_factory=lambda: f"cgp_{uuid4().hex[:8]}", primary_key=True)
gpu_model: str = Field(index=True)
architecture: GPUArchitecture = Field(default=GPUArchitecture.UNKNOWN)
consumer_grade: bool = Field(default=True)
edge_optimized: bool = Field(default=False)
# Hardware specifications
cuda_cores: int | None = Field(default=None)
memory_gb: int | None = Field(default=None)
memory_bandwidth_gbps: float | None = Field(default=None)
tensor_cores: int | None = Field(default=None)
base_clock_mhz: int | None = Field(default=None)
boost_clock_mhz: int | None = Field(default=None)
# Edge optimization metrics
power_consumption_w: float | None = Field(default=None)
thermal_design_power_w: float | None = Field(default=None)
noise_level_db: float | None = Field(default=None)
# Performance characteristics
fp32_tflops: float | None = Field(default=None)
fp16_tflops: float | None = Field(default=None)
int8_tops: float | None = Field(default=None)
# Edge-specific optimizations
low_latency_mode: bool = Field(default=False)
mobile_optimized: bool = Field(default=False)
thermal_throttling_resistance: float | None = Field(default=None)
# Compatibility flags
supported_cuda_versions: list = Field(default_factory=list, sa_column=Column(JSON, nullable=True))
supported_tensorrt_versions: list = Field(default_factory=list, sa_column=Column(JSON, nullable=True))
supported_ollama_models: list = Field(default_factory=list, sa_column=Column(JSON, nullable=True))
# Pricing and availability
market_price_usd: float | None = Field(default=None)
edge_premium_multiplier: float = Field(default=1.0)
availability_score: float = Field(default=1.0)
created_at: datetime = Field(default_factory=datetime.now(datetime.UTC))
updated_at: datetime = Field(default_factory=datetime.now(datetime.UTC))
class EdgeGPUMetrics(SQLModel, table=True):
"""Real-time edge GPU performance metrics"""
__tablename__ = "edge_gpu_metrics"
__table_args__ = {"extend_existing": True}
id: str = Field(default_factory=lambda: f"egm_{uuid4().hex[:8]}", primary_key=True)
gpu_id: str = Field(foreign_key="gpu_registry.id")
# Latency metrics
network_latency_ms: float = Field()
compute_latency_ms: float = Field()
total_latency_ms: float = Field()
# Resource utilization
gpu_utilization_percent: float = Field()
memory_utilization_percent: float = Field()
power_draw_w: float = Field()
temperature_celsius: float = Field()
# Edge-specific metrics
thermal_throttling_active: bool = Field(default=False)
power_limit_active: bool = Field(default=False)
clock_throttling_active: bool = Field(default=False)
# Geographic and network info
region: str = Field()
city: str | None = Field(default=None)
isp: str | None = Field(default=None)
connection_type: str | None = Field(default=None)
timestamp: datetime = Field(default_factory=datetime.now(datetime.UTC), index=True)
class GPUBooking(SQLModel, table=True):
"""Active and historical GPU bookings."""
__tablename__ = "gpu_bookings"
__table_args__ = {"extend_existing": True}
id: str = Field(default_factory=lambda: f"bk_{uuid4().hex[:10]}", primary_key=True)
gpu_id: str = Field(index=True)
client_id: str = Field(default="", index=True)
job_id: str | None = Field(default=None, index=True)
duration_hours: float = Field(default=0.0)
total_cost: float = Field(default=0.0)
status: str = Field(default="active", index=True) # active, completed, cancelled
start_time: datetime = Field(default_factory=datetime.now(datetime.UTC))
end_time: datetime | None = Field(default=None)
created_at: datetime = Field(default_factory=datetime.now(datetime.UTC), nullable=False)
class GPUReview(SQLModel, table=True):
"""Reviews for GPUs."""
__tablename__ = "gpu_reviews"
__table_args__ = {"extend_existing": True}
id: str = Field(default_factory=lambda: f"rv_{uuid4().hex[:10]}", primary_key=True)
gpu_id: str = Field(index=True)
user_id: str = Field(default="")
rating: int = Field(ge=1, le=5)
comment: str = Field(default="")
created_at: datetime = Field(default_factory=datetime.now(datetime.UTC), nullable=False, index=True)

View File

@@ -6,8 +6,9 @@ Manages GPU resource operations
from contextlib import asynccontextmanager
from typing import AsyncIterator
from fastapi import FastAPI
from fastapi import FastAPI, Depends
from pydantic import BaseModel
from sqlalchemy.ext.asyncio import AsyncSession
from aitbc import (
configure_logging,
@@ -18,6 +19,9 @@ from aitbc import (
ErrorHandlerMiddleware,
)
from .storage import init_db, get_session
from .services.edge_gpu_service import EdgeGPUService
# Configure structured logging
configure_logging(level="INFO")
logger = get_logger(__name__)
@@ -27,6 +31,8 @@ logger = get_logger(__name__)
async def lifespan(app: FastAPI) -> AsyncIterator[None]:
"""Lifecycle events for the GPU Service."""
logger.info("Starting GPU Service")
# Initialize database
await init_db()
yield
logger.info("Shutting down GPU Service")
@@ -67,6 +73,55 @@ async def gpu_status() -> dict[str, str]:
}
async def get_edge_service(session: AsyncSession = Depends(get_session)) -> EdgeGPUService:
"""Get edge GPU service instance"""
return EdgeGPUService(session)
@app.get("/v1/marketplace/edge-gpu/profiles")
async def get_consumer_gpu_profiles(
architecture: str | None = None,
edge_optimized: bool | None = None,
min_memory_gb: int | None = None,
svc: EdgeGPUService = Depends(get_edge_service),
):
"""Get consumer GPU profiles"""
from .domain.gpu_marketplace import GPUArchitecture
arch = GPUArchitecture(architecture) if architecture else None
return svc.list_profiles(architecture=arch, edge_optimized=edge_optimized, min_memory_gb=min_memory_gb)
@app.get("/v1/marketplace/edge-gpu/metrics/{gpu_id}")
async def get_edge_gpu_metrics(
gpu_id: str,
limit: int = 100,
svc: EdgeGPUService = Depends(get_edge_service),
):
"""Get edge GPU metrics"""
return svc.list_metrics(gpu_id=gpu_id, limit=limit)
@app.post("/v1/marketplace/edge-gpu/scan/{miner_id}")
async def scan_edge_gpus(
miner_id: str,
svc: EdgeGPUService = Depends(get_edge_service),
):
"""Scan and register edge GPUs for a miner"""
return await svc.discover_and_register_edge_gpus(miner_id)
@app.post("/v1/marketplace/edge-gpu/optimize/inference/{gpu_id}")
async def optimize_inference(
gpu_id: str,
model_name: str,
request_data: dict,
svc: EdgeGPUService = Depends(get_edge_service),
):
"""Optimize ML inference request for edge GPU"""
return await svc.optimize_inference_for_edge(gpu_id, model_name, request_data)
if __name__ == "__main__":
import uvicorn
uvicorn.run(app, host="0.0.0.0", port=8101)

View File

@@ -0,0 +1,7 @@
"""
GPU Service services
"""
from .edge_gpu_service import EdgeGPUService
__all__ = ["EdgeGPUService"]

View File

@@ -0,0 +1,78 @@
"""
Edge GPU service for managing GPU operations
"""
from typing import Any
from sqlmodel import Session, select
from ..data.consumer_gpu_profiles import CONSUMER_GPU_PROFILES
from ..domain.gpu_marketplace import ConsumerGPUProfile, EdgeGPUMetrics, GPUArchitecture
class EdgeGPUService:
def __init__(self, session: Session):
self.session = session
def list_profiles(
self,
architecture: GPUArchitecture | None = None,
edge_optimized: bool | None = None,
min_memory_gb: int | None = None,
) -> list[ConsumerGPUProfile]:
self.seed_profiles()
stmt = select(ConsumerGPUProfile)
if architecture:
stmt = stmt.where(ConsumerGPUProfile.architecture == architecture)
if edge_optimized is not None:
stmt = stmt.where(ConsumerGPUProfile.edge_optimized == edge_optimized)
if min_memory_gb is not None:
stmt = stmt.where(ConsumerGPUProfile.memory_gb >= min_memory_gb)
return list(self.session.execute(stmt).all())
def list_metrics(self, gpu_id: str, limit: int = 100) -> list[EdgeGPUMetrics]:
stmt = (
select(EdgeGPUMetrics)
.where(EdgeGPUMetrics.gpu_id == gpu_id)
.order_by(EdgeGPUMetrics.timestamp.desc())
.limit(limit)
)
return list(self.session.execute(stmt).all())
def create_metric(self, payload: dict) -> EdgeGPUMetrics:
metric = EdgeGPUMetrics(**payload)
self.session.add(metric)
self.session.commit()
self.session.refresh(metric)
return metric
def seed_profiles(self) -> None:
existing_models = {row[0] for row in self.session.execute(select(ConsumerGPUProfile.gpu_model)).all()}
created = 0
for profile in CONSUMER_GPU_PROFILES.values():
if profile["gpu_model"] in existing_models:
continue
self.session.add(ConsumerGPUProfile(**profile))
created += 1
if created:
self.session.commit()
async def discover_and_register_edge_gpus(self, miner_id: str) -> dict[str, Any]:
"""Scan and register edge GPUs for a miner"""
# Placeholder for GPU discovery logic
return {
"miner_id": miner_id,
"gpus": [],
"registered": 0,
"edge_optimized": 0,
}
async def optimize_inference_for_edge(self, gpu_id: str, model_name: str, request_data: dict) -> dict[str, Any]:
"""Optimize ML inference request for edge GPU"""
# Placeholder for inference optimization logic
return {
"gpu_id": gpu_id,
"model_name": model_name,
"optimized": True,
"latency_reduction": 0.0,
}

View File

@@ -0,0 +1,42 @@
"""
Database session management for GPU service
"""
from contextlib import asynccontextmanager
from typing import AsyncIterator
from sqlalchemy.ext.asyncio import AsyncSession, create_async_engine
from sqlmodel import SQLModel
from aitbc import get_logger
logger = get_logger(__name__)
# Database URL from environment variable or default
DATABASE_URL = "postgresql+asyncpg://aitbc_gpu:password@localhost:5432/aitbc_gpu"
# Create async engine
engine = create_async_engine(DATABASE_URL, echo=False)
async def init_db() -> None:
"""Initialize database tables"""
from .domain.gpu_marketplace import (
GPURegistry,
ConsumerGPUProfile,
EdgeGPUMetrics,
GPUBooking,
GPUReview,
)
async with engine.begin() as conn:
await conn.run_sync(SQLModel.metadata.create_all)
logger.info("GPU service database initialized")
@asynccontextmanager
async def get_session() -> AsyncIterator[AsyncSession]:
"""Get database session"""
async with AsyncSession(engine) as session:
yield session