feat: add foreign key constraints and metrics for blockchain node
This commit is contained in:
5
apps/pool-hub/src/poolhub/app/__init__.py
Normal file
5
apps/pool-hub/src/poolhub/app/__init__.py
Normal file
@@ -0,0 +1,5 @@
|
||||
"""FastAPI application wiring for the AITBC Pool Hub."""
|
||||
|
||||
from .main import create_app, app
|
||||
|
||||
__all__ = ["create_app", "app"]
|
||||
27
apps/pool-hub/src/poolhub/app/deps.py
Normal file
27
apps/pool-hub/src/poolhub/app/deps.py
Normal file
@@ -0,0 +1,27 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import AsyncGenerator
|
||||
|
||||
from fastapi import Depends
|
||||
|
||||
from ..database import get_session
|
||||
from ..redis_cache import get_redis
|
||||
|
||||
|
||||
def get_db_session() -> AsyncGenerator:
|
||||
return get_session()
|
||||
|
||||
|
||||
def get_redis_client() -> AsyncGenerator:
|
||||
return get_redis()
|
||||
|
||||
|
||||
# FastAPI dependency wrappers
|
||||
async def db_session_dep(session=Depends(get_session)):
|
||||
async for s in session:
|
||||
yield s
|
||||
|
||||
|
||||
async def redis_dep(client=Depends(get_redis)):
|
||||
async for c in client:
|
||||
yield c
|
||||
31
apps/pool-hub/src/poolhub/app/main.py
Normal file
31
apps/pool-hub/src/poolhub/app/main.py
Normal file
@@ -0,0 +1,31 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from contextlib import asynccontextmanager
|
||||
|
||||
from fastapi import FastAPI
|
||||
|
||||
from ..database import close_engine, create_engine
|
||||
from ..redis_cache import close_redis, create_redis
|
||||
from ..settings import settings
|
||||
from .routers import health_router, match_router, metrics_router
|
||||
|
||||
|
||||
@asynccontextmanager
|
||||
async def lifespan(_: FastAPI):
|
||||
create_engine()
|
||||
create_redis()
|
||||
try:
|
||||
yield
|
||||
finally:
|
||||
await close_engine()
|
||||
await close_redis()
|
||||
|
||||
|
||||
app = FastAPI(**settings.asgi_kwargs(), lifespan=lifespan)
|
||||
app.include_router(match_router, prefix="/v1")
|
||||
app.include_router(health_router)
|
||||
app.include_router(metrics_router)
|
||||
|
||||
|
||||
def create_app() -> FastAPI:
|
||||
return app
|
||||
39
apps/pool-hub/src/poolhub/app/prometheus.py
Normal file
39
apps/pool-hub/src/poolhub/app/prometheus.py
Normal file
@@ -0,0 +1,39 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from prometheus_client import CONTENT_TYPE_LATEST, Counter, Gauge, Histogram, generate_latest
|
||||
|
||||
match_requests_total = Counter(
|
||||
"poolhub_match_requests_total",
|
||||
"Total number of match requests received",
|
||||
)
|
||||
match_candidates_returned = Counter(
|
||||
"poolhub_match_candidates_total",
|
||||
"Total number of candidates returned",
|
||||
)
|
||||
match_failures_total = Counter(
|
||||
"poolhub_match_failures_total",
|
||||
"Total number of match request failures",
|
||||
)
|
||||
match_latency_seconds = Histogram(
|
||||
"poolhub_match_latency_seconds",
|
||||
"Latency of match processing",
|
||||
buckets=(0.01, 0.05, 0.1, 0.25, 0.5, 1.0, 2.5, 5.0),
|
||||
)
|
||||
miners_online_gauge = Gauge(
|
||||
"poolhub_miners_online",
|
||||
"Number of miners considered online",
|
||||
)
|
||||
|
||||
|
||||
def render_metrics() -> tuple[str, str]:
|
||||
return generate_latest(), CONTENT_TYPE_LATEST
|
||||
|
||||
|
||||
def reset_metrics() -> None:
|
||||
match_requests_total._value.set(0) # type: ignore[attr-defined]
|
||||
match_candidates_returned._value.set(0) # type: ignore[attr-defined]
|
||||
match_failures_total._value.set(0) # type: ignore[attr-defined]
|
||||
match_latency_seconds._sum.set(0) # type: ignore[attr-defined]
|
||||
match_latency_seconds._count.set(0) # type: ignore[attr-defined]
|
||||
match_latency_seconds._samples = [] # type: ignore[attr-defined]
|
||||
miners_online_gauge._value.set(0) # type: ignore[attr-defined]
|
||||
7
apps/pool-hub/src/poolhub/app/routers/__init__.py
Normal file
7
apps/pool-hub/src/poolhub/app/routers/__init__.py
Normal file
@@ -0,0 +1,7 @@
|
||||
"""FastAPI routers for Pool Hub."""
|
||||
|
||||
from .match import router as match_router
|
||||
from .health import router as health_router
|
||||
from .metrics import router as metrics_router
|
||||
|
||||
__all__ = ["match_router", "health_router", "metrics_router"]
|
||||
50
apps/pool-hub/src/poolhub/app/routers/health.py
Normal file
50
apps/pool-hub/src/poolhub/app/routers/health.py
Normal file
@@ -0,0 +1,50 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from fastapi import APIRouter, Depends
|
||||
from redis.asyncio import Redis
|
||||
from sqlalchemy.ext.asyncio import AsyncSession
|
||||
|
||||
from ..deps import db_session_dep, redis_dep
|
||||
from ..prometheus import miners_online_gauge
|
||||
from poolhub.repositories.miner_repository import MinerRepository
|
||||
from ..schemas import HealthResponse
|
||||
|
||||
router = APIRouter(tags=["health"], prefix="/v1")
|
||||
|
||||
|
||||
@router.get("/health", response_model=HealthResponse, summary="Pool Hub health status")
|
||||
async def health_endpoint(
|
||||
session: AsyncSession = Depends(db_session_dep),
|
||||
redis: Redis = Depends(redis_dep),
|
||||
) -> HealthResponse:
|
||||
db_ok = True
|
||||
redis_ok = True
|
||||
db_error: str | None = None
|
||||
redis_error: str | None = None
|
||||
|
||||
try:
|
||||
await session.execute("SELECT 1")
|
||||
except Exception as exc: # pragma: no cover
|
||||
db_ok = False
|
||||
db_error = str(exc)
|
||||
|
||||
try:
|
||||
await redis.ping()
|
||||
except Exception as exc: # pragma: no cover
|
||||
redis_ok = False
|
||||
redis_error = str(exc)
|
||||
|
||||
miner_repo = MinerRepository(session, redis)
|
||||
active_miners = await miner_repo.list_active_miners()
|
||||
miners_online = len(active_miners)
|
||||
miners_online_gauge.set(miners_online)
|
||||
|
||||
status = "ok" if db_ok and redis_ok else "degraded"
|
||||
return HealthResponse(
|
||||
status=status,
|
||||
db=db_ok,
|
||||
redis=redis_ok,
|
||||
miners_online=miners_online,
|
||||
db_error=db_error,
|
||||
redis_error=redis_error,
|
||||
)
|
||||
116
apps/pool-hub/src/poolhub/app/routers/match.py
Normal file
116
apps/pool-hub/src/poolhub/app/routers/match.py
Normal file
@@ -0,0 +1,116 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import time
|
||||
from typing import Any, Dict, List
|
||||
|
||||
from fastapi import APIRouter, Depends, HTTPException, status
|
||||
from redis.asyncio import Redis
|
||||
from sqlalchemy.ext.asyncio import AsyncSession
|
||||
|
||||
from ..deps import db_session_dep, redis_dep
|
||||
from ..prometheus import (
|
||||
match_candidates_returned,
|
||||
match_failures_total,
|
||||
match_latency_seconds,
|
||||
match_requests_total,
|
||||
)
|
||||
from poolhub.repositories.match_repository import MatchRepository
|
||||
from poolhub.repositories.miner_repository import MinerRepository
|
||||
from ..schemas import MatchCandidate, MatchRequestPayload, MatchResponse
|
||||
|
||||
router = APIRouter(tags=["match"])
|
||||
|
||||
|
||||
def _normalize_requirements(requirements: Dict[str, Any]) -> Dict[str, Any]:
|
||||
return requirements or {}
|
||||
|
||||
|
||||
def _candidate_from_payload(payload: Dict[str, Any]) -> MatchCandidate:
|
||||
return MatchCandidate(**payload)
|
||||
|
||||
|
||||
@router.post("/match", response_model=MatchResponse, summary="Find top miners for a job")
|
||||
async def match_endpoint(
|
||||
payload: MatchRequestPayload,
|
||||
session: AsyncSession = Depends(db_session_dep),
|
||||
redis: Redis = Depends(redis_dep),
|
||||
) -> MatchResponse:
|
||||
start = time.perf_counter()
|
||||
match_requests_total.inc()
|
||||
|
||||
miner_repo = MinerRepository(session, redis)
|
||||
match_repo = MatchRepository(session, redis)
|
||||
|
||||
requirements = _normalize_requirements(payload.requirements)
|
||||
top_k = payload.top_k
|
||||
|
||||
try:
|
||||
request = await match_repo.create_request(
|
||||
job_id=payload.job_id,
|
||||
requirements=requirements,
|
||||
hints=payload.hints,
|
||||
top_k=top_k,
|
||||
)
|
||||
|
||||
active_miners = await miner_repo.list_active_miners()
|
||||
candidates = _select_candidates(requirements, payload.hints, active_miners, top_k)
|
||||
|
||||
await match_repo.add_results(
|
||||
request_id=request.id,
|
||||
candidates=candidates,
|
||||
)
|
||||
|
||||
match_candidates_returned.inc(len(candidates))
|
||||
duration = time.perf_counter() - start
|
||||
match_latency_seconds.observe(duration)
|
||||
|
||||
return MatchResponse(
|
||||
job_id=payload.job_id,
|
||||
candidates=[_candidate_from_payload(candidate) for candidate in candidates],
|
||||
)
|
||||
except Exception as exc: # pragma: no cover - safeguards unexpected failures
|
||||
match_failures_total.inc()
|
||||
raise HTTPException(status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, detail="match_failed") from exc
|
||||
|
||||
|
||||
def _select_candidates(
|
||||
requirements: Dict[str, Any],
|
||||
hints: Dict[str, Any],
|
||||
active_miners: List[tuple],
|
||||
top_k: int,
|
||||
) -> List[Dict[str, Any]]:
|
||||
min_vram = float(requirements.get("min_vram_gb", 0))
|
||||
min_ram = float(requirements.get("min_ram_gb", 0))
|
||||
capabilities_required = set(requirements.get("capabilities_any", []))
|
||||
region_hint = hints.get("region")
|
||||
|
||||
ranked: List[Dict[str, Any]] = []
|
||||
for miner, status, score in active_miners:
|
||||
if miner.gpu_vram_gb and miner.gpu_vram_gb < min_vram:
|
||||
continue
|
||||
if miner.ram_gb and miner.ram_gb < min_ram:
|
||||
continue
|
||||
if capabilities_required and not capabilities_required.issubset(set(miner.capabilities or [])):
|
||||
continue
|
||||
if region_hint and miner.region and miner.region != region_hint:
|
||||
continue
|
||||
|
||||
candidate = {
|
||||
"miner_id": miner.miner_id,
|
||||
"addr": miner.addr,
|
||||
"proto": miner.proto,
|
||||
"score": float(score),
|
||||
"explain": _compose_explain(score, miner, status),
|
||||
"eta_ms": status.avg_latency_ms if status else None,
|
||||
"price": miner.base_price,
|
||||
}
|
||||
ranked.append(candidate)
|
||||
|
||||
ranked.sort(key=lambda item: item["score"], reverse=True)
|
||||
return ranked[:top_k]
|
||||
|
||||
|
||||
def _compose_explain(score: float, miner, status) -> str:
|
||||
load = status.queue_len if status else 0
|
||||
latency = status.avg_latency_ms if status else "n/a"
|
||||
return f"score={score:.3f} load={load} latency={latency}"
|
||||
13
apps/pool-hub/src/poolhub/app/routers/metrics.py
Normal file
13
apps/pool-hub/src/poolhub/app/routers/metrics.py
Normal file
@@ -0,0 +1,13 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from fastapi import APIRouter, Response
|
||||
|
||||
from ..prometheus import render_metrics
|
||||
|
||||
router = APIRouter(tags=["metrics"])
|
||||
|
||||
|
||||
@router.get("/metrics", summary="Prometheus metrics")
|
||||
async def metrics_endpoint() -> Response:
|
||||
payload, content_type = render_metrics()
|
||||
return Response(content=payload, media_type=content_type)
|
||||
40
apps/pool-hub/src/poolhub/app/schemas.py
Normal file
40
apps/pool-hub/src/poolhub/app/schemas.py
Normal file
@@ -0,0 +1,40 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import Any, Dict, List, Optional
|
||||
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
|
||||
class MatchRequestPayload(BaseModel):
|
||||
job_id: str
|
||||
requirements: Dict[str, Any] = Field(default_factory=dict)
|
||||
hints: Dict[str, Any] = Field(default_factory=dict)
|
||||
top_k: int = Field(default=1, ge=1, le=50)
|
||||
|
||||
|
||||
class MatchCandidate(BaseModel):
|
||||
miner_id: str
|
||||
addr: str
|
||||
proto: str
|
||||
score: float
|
||||
explain: Optional[str] = None
|
||||
eta_ms: Optional[int] = None
|
||||
price: Optional[float] = None
|
||||
|
||||
|
||||
class MatchResponse(BaseModel):
|
||||
job_id: str
|
||||
candidates: List[MatchCandidate]
|
||||
|
||||
|
||||
class HealthResponse(BaseModel):
|
||||
status: str
|
||||
db: bool
|
||||
redis: bool
|
||||
miners_online: int
|
||||
db_error: Optional[str] = None
|
||||
redis_error: Optional[str] = None
|
||||
|
||||
|
||||
class MetricsResponse(BaseModel):
|
||||
detail: str = "Prometheus metrics output"
|
||||
Reference in New Issue
Block a user