feat: add foreign key constraints and metrics for blockchain node

This commit is contained in:
oib
2025-09-28 06:04:30 +02:00
parent fe29631a86
commit b8b640666d
188 changed files with 15678 additions and 158 deletions

View File

@@ -0,0 +1,5 @@
"""FastAPI application wiring for the AITBC Pool Hub."""
from .main import create_app, app
__all__ = ["create_app", "app"]

View File

@@ -0,0 +1,27 @@
from __future__ import annotations
from typing import AsyncGenerator
from fastapi import Depends
from ..database import get_session
from ..redis_cache import get_redis
def get_db_session() -> AsyncGenerator:
return get_session()
def get_redis_client() -> AsyncGenerator:
return get_redis()
# FastAPI dependency wrappers
async def db_session_dep(session=Depends(get_session)):
async for s in session:
yield s
async def redis_dep(client=Depends(get_redis)):
async for c in client:
yield c

View File

@@ -0,0 +1,31 @@
from __future__ import annotations
from contextlib import asynccontextmanager
from fastapi import FastAPI
from ..database import close_engine, create_engine
from ..redis_cache import close_redis, create_redis
from ..settings import settings
from .routers import health_router, match_router, metrics_router
@asynccontextmanager
async def lifespan(_: FastAPI):
create_engine()
create_redis()
try:
yield
finally:
await close_engine()
await close_redis()
app = FastAPI(**settings.asgi_kwargs(), lifespan=lifespan)
app.include_router(match_router, prefix="/v1")
app.include_router(health_router)
app.include_router(metrics_router)
def create_app() -> FastAPI:
return app

View File

@@ -0,0 +1,39 @@
from __future__ import annotations
from prometheus_client import CONTENT_TYPE_LATEST, Counter, Gauge, Histogram, generate_latest
match_requests_total = Counter(
"poolhub_match_requests_total",
"Total number of match requests received",
)
match_candidates_returned = Counter(
"poolhub_match_candidates_total",
"Total number of candidates returned",
)
match_failures_total = Counter(
"poolhub_match_failures_total",
"Total number of match request failures",
)
match_latency_seconds = Histogram(
"poolhub_match_latency_seconds",
"Latency of match processing",
buckets=(0.01, 0.05, 0.1, 0.25, 0.5, 1.0, 2.5, 5.0),
)
miners_online_gauge = Gauge(
"poolhub_miners_online",
"Number of miners considered online",
)
def render_metrics() -> tuple[str, str]:
return generate_latest(), CONTENT_TYPE_LATEST
def reset_metrics() -> None:
match_requests_total._value.set(0) # type: ignore[attr-defined]
match_candidates_returned._value.set(0) # type: ignore[attr-defined]
match_failures_total._value.set(0) # type: ignore[attr-defined]
match_latency_seconds._sum.set(0) # type: ignore[attr-defined]
match_latency_seconds._count.set(0) # type: ignore[attr-defined]
match_latency_seconds._samples = [] # type: ignore[attr-defined]
miners_online_gauge._value.set(0) # type: ignore[attr-defined]

View File

@@ -0,0 +1,7 @@
"""FastAPI routers for Pool Hub."""
from .match import router as match_router
from .health import router as health_router
from .metrics import router as metrics_router
__all__ = ["match_router", "health_router", "metrics_router"]

View File

@@ -0,0 +1,50 @@
from __future__ import annotations
from fastapi import APIRouter, Depends
from redis.asyncio import Redis
from sqlalchemy.ext.asyncio import AsyncSession
from ..deps import db_session_dep, redis_dep
from ..prometheus import miners_online_gauge
from poolhub.repositories.miner_repository import MinerRepository
from ..schemas import HealthResponse
router = APIRouter(tags=["health"], prefix="/v1")
@router.get("/health", response_model=HealthResponse, summary="Pool Hub health status")
async def health_endpoint(
session: AsyncSession = Depends(db_session_dep),
redis: Redis = Depends(redis_dep),
) -> HealthResponse:
db_ok = True
redis_ok = True
db_error: str | None = None
redis_error: str | None = None
try:
await session.execute("SELECT 1")
except Exception as exc: # pragma: no cover
db_ok = False
db_error = str(exc)
try:
await redis.ping()
except Exception as exc: # pragma: no cover
redis_ok = False
redis_error = str(exc)
miner_repo = MinerRepository(session, redis)
active_miners = await miner_repo.list_active_miners()
miners_online = len(active_miners)
miners_online_gauge.set(miners_online)
status = "ok" if db_ok and redis_ok else "degraded"
return HealthResponse(
status=status,
db=db_ok,
redis=redis_ok,
miners_online=miners_online,
db_error=db_error,
redis_error=redis_error,
)

View File

@@ -0,0 +1,116 @@
from __future__ import annotations
import time
from typing import Any, Dict, List
from fastapi import APIRouter, Depends, HTTPException, status
from redis.asyncio import Redis
from sqlalchemy.ext.asyncio import AsyncSession
from ..deps import db_session_dep, redis_dep
from ..prometheus import (
match_candidates_returned,
match_failures_total,
match_latency_seconds,
match_requests_total,
)
from poolhub.repositories.match_repository import MatchRepository
from poolhub.repositories.miner_repository import MinerRepository
from ..schemas import MatchCandidate, MatchRequestPayload, MatchResponse
router = APIRouter(tags=["match"])
def _normalize_requirements(requirements: Dict[str, Any]) -> Dict[str, Any]:
return requirements or {}
def _candidate_from_payload(payload: Dict[str, Any]) -> MatchCandidate:
return MatchCandidate(**payload)
@router.post("/match", response_model=MatchResponse, summary="Find top miners for a job")
async def match_endpoint(
payload: MatchRequestPayload,
session: AsyncSession = Depends(db_session_dep),
redis: Redis = Depends(redis_dep),
) -> MatchResponse:
start = time.perf_counter()
match_requests_total.inc()
miner_repo = MinerRepository(session, redis)
match_repo = MatchRepository(session, redis)
requirements = _normalize_requirements(payload.requirements)
top_k = payload.top_k
try:
request = await match_repo.create_request(
job_id=payload.job_id,
requirements=requirements,
hints=payload.hints,
top_k=top_k,
)
active_miners = await miner_repo.list_active_miners()
candidates = _select_candidates(requirements, payload.hints, active_miners, top_k)
await match_repo.add_results(
request_id=request.id,
candidates=candidates,
)
match_candidates_returned.inc(len(candidates))
duration = time.perf_counter() - start
match_latency_seconds.observe(duration)
return MatchResponse(
job_id=payload.job_id,
candidates=[_candidate_from_payload(candidate) for candidate in candidates],
)
except Exception as exc: # pragma: no cover - safeguards unexpected failures
match_failures_total.inc()
raise HTTPException(status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, detail="match_failed") from exc
def _select_candidates(
requirements: Dict[str, Any],
hints: Dict[str, Any],
active_miners: List[tuple],
top_k: int,
) -> List[Dict[str, Any]]:
min_vram = float(requirements.get("min_vram_gb", 0))
min_ram = float(requirements.get("min_ram_gb", 0))
capabilities_required = set(requirements.get("capabilities_any", []))
region_hint = hints.get("region")
ranked: List[Dict[str, Any]] = []
for miner, status, score in active_miners:
if miner.gpu_vram_gb and miner.gpu_vram_gb < min_vram:
continue
if miner.ram_gb and miner.ram_gb < min_ram:
continue
if capabilities_required and not capabilities_required.issubset(set(miner.capabilities or [])):
continue
if region_hint and miner.region and miner.region != region_hint:
continue
candidate = {
"miner_id": miner.miner_id,
"addr": miner.addr,
"proto": miner.proto,
"score": float(score),
"explain": _compose_explain(score, miner, status),
"eta_ms": status.avg_latency_ms if status else None,
"price": miner.base_price,
}
ranked.append(candidate)
ranked.sort(key=lambda item: item["score"], reverse=True)
return ranked[:top_k]
def _compose_explain(score: float, miner, status) -> str:
load = status.queue_len if status else 0
latency = status.avg_latency_ms if status else "n/a"
return f"score={score:.3f} load={load} latency={latency}"

View File

@@ -0,0 +1,13 @@
from __future__ import annotations
from fastapi import APIRouter, Response
from ..prometheus import render_metrics
router = APIRouter(tags=["metrics"])
@router.get("/metrics", summary="Prometheus metrics")
async def metrics_endpoint() -> Response:
payload, content_type = render_metrics()
return Response(content=payload, media_type=content_type)

View File

@@ -0,0 +1,40 @@
from __future__ import annotations
from typing import Any, Dict, List, Optional
from pydantic import BaseModel, Field
class MatchRequestPayload(BaseModel):
job_id: str
requirements: Dict[str, Any] = Field(default_factory=dict)
hints: Dict[str, Any] = Field(default_factory=dict)
top_k: int = Field(default=1, ge=1, le=50)
class MatchCandidate(BaseModel):
miner_id: str
addr: str
proto: str
score: float
explain: Optional[str] = None
eta_ms: Optional[int] = None
price: Optional[float] = None
class MatchResponse(BaseModel):
job_id: str
candidates: List[MatchCandidate]
class HealthResponse(BaseModel):
status: str
db: bool
redis: bool
miners_online: int
db_error: Optional[str] = None
redis_error: Optional[str] = None
class MetricsResponse(BaseModel):
detail: str = "Prometheus metrics output"