chore: initialize monorepo with project scaffolding, configs, and CI setup
This commit is contained in:
34
apps/coordinator-api/README.md
Normal file
34
apps/coordinator-api/README.md
Normal file
@ -0,0 +1,34 @@
|
||||
# Coordinator API
|
||||
|
||||
## Purpose & Scope
|
||||
|
||||
FastAPI service that accepts client compute jobs, matches miners, and tracks job lifecycle for the AITBC network.
|
||||
|
||||
## Development Setup
|
||||
|
||||
1. Create a virtual environment in `apps/coordinator-api/.venv`.
|
||||
2. Install dependencies listed in `pyproject.toml` once added.
|
||||
3. Run the FastAPI app via `uvicorn app.main:app --reload`.
|
||||
|
||||
## Configuration
|
||||
|
||||
Expects environment variables defined in `.env` (see `docs/bootstrap/coordinator_api.md`).
|
||||
|
||||
### Signed receipts (optional)
|
||||
|
||||
- Generate an Ed25519 key:
|
||||
```bash
|
||||
python - <<'PY'
|
||||
from nacl.signing import SigningKey
|
||||
sk = SigningKey.generate()
|
||||
print(sk.encode().hex())
|
||||
PY
|
||||
```
|
||||
- Set `RECEIPT_SIGNING_KEY_HEX` in the `.env` file to the printed hex string to enable signed receipts returned by `/v1/miners/{job_id}/result` and retrievable via `/v1/jobs/{job_id}/receipt`.
|
||||
- Receipt history is available at `/v1/jobs/{job_id}/receipts` (requires client API key) and returns all stored signed payloads.
|
||||
- To enable coordinator attestations, set `RECEIPT_ATTESTATION_KEY_HEX` to a separate Ed25519 private key; responses include an `attestations` array alongside the miner signature.
|
||||
- Clients can verify `signature` objects using the `aitbc_crypto` package (see `protocols/receipts/spec.md`).
|
||||
|
||||
## Systemd
|
||||
|
||||
Service name: `aitbc-coordinator-api` (to be defined under `configs/systemd/`).
|
||||
33
apps/coordinator-api/pyproject.toml
Normal file
33
apps/coordinator-api/pyproject.toml
Normal file
@ -0,0 +1,33 @@
|
||||
[tool.poetry]
|
||||
name = "aitbc-coordinator-api"
|
||||
version = "0.1.0"
|
||||
description = "AITBC Coordinator API service"
|
||||
authors = ["AITBC Team"]
|
||||
packages = [
|
||||
{ include = "app", from = "src" }
|
||||
]
|
||||
|
||||
[tool.poetry.dependencies]
|
||||
python = "^3.11"
|
||||
fastapi = "^0.111.0"
|
||||
uvicorn = { extras = ["standard"], version = "^0.30.0" }
|
||||
pydantic = "^2.7.0"
|
||||
pydantic-settings = "^2.2.1"
|
||||
sqlalchemy = "^2.0.30"
|
||||
aiosqlite = "^0.20.0"
|
||||
sqlmodel = "^0.0.16"
|
||||
httpx = "^0.27.0"
|
||||
python-dotenv = "^1.0.1"
|
||||
slowapi = "^0.1.8"
|
||||
orjson = "^3.10.0"
|
||||
gunicorn = "^22.0.0"
|
||||
aitbc-crypto = {path = "../../packages/py/aitbc-crypto"}
|
||||
|
||||
[tool.poetry.group.dev.dependencies]
|
||||
pytest = "^8.2.0"
|
||||
pytest-asyncio = "^0.23.0"
|
||||
httpx = {extras=["cli"], version="^0.27.0"}
|
||||
|
||||
[build-system]
|
||||
requires = ["poetry-core>=1.0.0"]
|
||||
build-backend = "poetry.core.masonry.api"
|
||||
1
apps/coordinator-api/src/app/__init__.py
Normal file
1
apps/coordinator-api/src/app/__init__.py
Normal file
@ -0,0 +1 @@
|
||||
"""AITBC Coordinator API package."""
|
||||
32
apps/coordinator-api/src/app/config.py
Normal file
32
apps/coordinator-api/src/app/config.py
Normal file
@ -0,0 +1,32 @@
|
||||
from pydantic_settings import BaseSettings, SettingsConfigDict
|
||||
from typing import List, Optional
|
||||
|
||||
|
||||
class Settings(BaseSettings):
|
||||
model_config = SettingsConfigDict(env_file=".env", env_file_encoding="utf-8", case_sensitive=False)
|
||||
|
||||
app_env: str = "dev"
|
||||
app_host: str = "127.0.0.1"
|
||||
app_port: int = 8011
|
||||
|
||||
database_url: str = "sqlite:///./coordinator.db"
|
||||
|
||||
client_api_keys: List[str] = ["client_dev_key_1"]
|
||||
miner_api_keys: List[str] = ["miner_dev_key_1"]
|
||||
admin_api_keys: List[str] = ["admin_dev_key_1"]
|
||||
|
||||
hmac_secret: Optional[str] = None
|
||||
allow_origins: List[str] = ["*"]
|
||||
|
||||
job_ttl_seconds: int = 900
|
||||
heartbeat_interval_seconds: int = 10
|
||||
heartbeat_timeout_seconds: int = 30
|
||||
|
||||
rate_limit_requests: int = 60
|
||||
rate_limit_window_seconds: int = 60
|
||||
|
||||
receipt_signing_key_hex: Optional[str] = None
|
||||
receipt_attestation_key_hex: Optional[str] = None
|
||||
|
||||
|
||||
settings = Settings()
|
||||
26
apps/coordinator-api/src/app/deps.py
Normal file
26
apps/coordinator-api/src/app/deps.py
Normal file
@ -0,0 +1,26 @@
|
||||
from typing import Callable
|
||||
from fastapi import Depends, Header, HTTPException
|
||||
|
||||
from .config import settings
|
||||
|
||||
|
||||
class APIKeyValidator:
|
||||
def __init__(self, allowed_keys: list[str]):
|
||||
self.allowed_keys = {key.strip() for key in allowed_keys if key}
|
||||
|
||||
def __call__(self, api_key: str | None = Header(default=None, alias="X-Api-Key")) -> str:
|
||||
if not api_key or api_key not in self.allowed_keys:
|
||||
raise HTTPException(status_code=401, detail="invalid api key")
|
||||
return api_key
|
||||
|
||||
|
||||
def require_client_key() -> Callable[[str | None], str]:
|
||||
return APIKeyValidator(settings.client_api_keys)
|
||||
|
||||
|
||||
def require_miner_key() -> Callable[[str | None], str]:
|
||||
return APIKeyValidator(settings.miner_api_keys)
|
||||
|
||||
|
||||
def require_admin_key() -> Callable[[str | None], str]:
|
||||
return APIKeyValidator(settings.admin_api_keys)
|
||||
7
apps/coordinator-api/src/app/domain/__init__.py
Normal file
7
apps/coordinator-api/src/app/domain/__init__.py
Normal file
@ -0,0 +1,7 @@
|
||||
"""Domain models for the coordinator API."""
|
||||
|
||||
from .job import Job
|
||||
from .miner import Miner
|
||||
from .job_receipt import JobReceipt
|
||||
|
||||
__all__ = ["Job", "Miner", "JobReceipt"]
|
||||
30
apps/coordinator-api/src/app/domain/job.py
Normal file
30
apps/coordinator-api/src/app/domain/job.py
Normal file
@ -0,0 +1,30 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from datetime import datetime
|
||||
from typing import Optional
|
||||
from uuid import uuid4
|
||||
|
||||
from sqlalchemy import Column, JSON
|
||||
from sqlmodel import Field, SQLModel
|
||||
|
||||
from ..models import JobState
|
||||
|
||||
|
||||
class Job(SQLModel, table=True):
|
||||
id: str = Field(default_factory=lambda: uuid4().hex, primary_key=True, index=True)
|
||||
client_id: str = Field(index=True)
|
||||
|
||||
state: JobState = Field(default=JobState.queued, sa_column_kwargs={"nullable": False})
|
||||
payload: dict = Field(sa_column=Column(JSON, nullable=False))
|
||||
constraints: dict = Field(default_factory=dict, sa_column=Column(JSON, nullable=False))
|
||||
|
||||
ttl_seconds: int = Field(default=900)
|
||||
requested_at: datetime = Field(default_factory=datetime.utcnow)
|
||||
expires_at: datetime = Field(default_factory=datetime.utcnow)
|
||||
|
||||
assigned_miner_id: Optional[str] = Field(default=None, index=True)
|
||||
|
||||
result: Optional[dict] = Field(default=None, sa_column=Column(JSON, nullable=True))
|
||||
receipt: Optional[dict] = Field(default=None, sa_column=Column(JSON, nullable=True))
|
||||
receipt_id: Optional[str] = Field(default=None, index=True)
|
||||
error: Optional[str] = None
|
||||
15
apps/coordinator-api/src/app/domain/job_receipt.py
Normal file
15
apps/coordinator-api/src/app/domain/job_receipt.py
Normal file
@ -0,0 +1,15 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from datetime import datetime
|
||||
from uuid import uuid4
|
||||
|
||||
from sqlalchemy import Column, JSON
|
||||
from sqlmodel import Field, SQLModel
|
||||
|
||||
|
||||
class JobReceipt(SQLModel, table=True):
|
||||
id: str = Field(default_factory=lambda: uuid4().hex, primary_key=True, index=True)
|
||||
job_id: str = Field(index=True, foreign_key="job.id")
|
||||
receipt_id: str = Field(index=True)
|
||||
payload: dict = Field(sa_column=Column(JSON, nullable=False))
|
||||
created_at: datetime = Field(default_factory=datetime.utcnow, index=True)
|
||||
25
apps/coordinator-api/src/app/domain/miner.py
Normal file
25
apps/coordinator-api/src/app/domain/miner.py
Normal file
@ -0,0 +1,25 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from datetime import datetime
|
||||
from typing import Optional
|
||||
|
||||
from sqlalchemy import Column, JSON
|
||||
from sqlmodel import Field, SQLModel
|
||||
|
||||
|
||||
class Miner(SQLModel, table=True):
|
||||
id: str = Field(primary_key=True, index=True)
|
||||
region: Optional[str] = Field(default=None, index=True)
|
||||
capabilities: dict = Field(default_factory=dict, sa_column=Column(JSON, nullable=False))
|
||||
concurrency: int = Field(default=1)
|
||||
status: str = Field(default="ONLINE", index=True)
|
||||
inflight: int = Field(default=0)
|
||||
extra_metadata: dict = Field(default_factory=dict, sa_column=Column(JSON, nullable=False))
|
||||
last_heartbeat: datetime = Field(default_factory=datetime.utcnow, index=True)
|
||||
session_token: Optional[str] = None
|
||||
last_job_at: Optional[datetime] = Field(default=None, index=True)
|
||||
jobs_completed: int = Field(default=0)
|
||||
jobs_failed: int = Field(default=0)
|
||||
total_job_duration_ms: int = Field(default=0)
|
||||
average_job_duration_ms: float = Field(default=0.0)
|
||||
last_receipt_id: Optional[str] = Field(default=None, index=True)
|
||||
34
apps/coordinator-api/src/app/main.py
Normal file
34
apps/coordinator-api/src/app/main.py
Normal file
@ -0,0 +1,34 @@
|
||||
from fastapi import FastAPI
|
||||
from fastapi.middleware.cors import CORSMiddleware
|
||||
|
||||
from .config import settings
|
||||
from .routers import client, miner, admin
|
||||
|
||||
|
||||
def create_app() -> FastAPI:
|
||||
app = FastAPI(
|
||||
title="AITBC Coordinator API",
|
||||
version="0.1.0",
|
||||
description="Stage 1 coordinator service handling job orchestration between clients and miners.",
|
||||
)
|
||||
|
||||
app.add_middleware(
|
||||
CORSMiddleware,
|
||||
allow_origins=settings.allow_origins,
|
||||
allow_credentials=True,
|
||||
allow_methods=["*"],
|
||||
allow_headers=["*"]
|
||||
)
|
||||
|
||||
app.include_router(client.router, prefix="/v1")
|
||||
app.include_router(miner.router, prefix="/v1")
|
||||
app.include_router(admin.router, prefix="/v1")
|
||||
|
||||
@app.get("/v1/health", tags=["health"], summary="Service healthcheck")
|
||||
async def health() -> dict[str, str]:
|
||||
return {"status": "ok", "env": settings.app_env}
|
||||
|
||||
return app
|
||||
|
||||
|
||||
app = create_app()
|
||||
78
apps/coordinator-api/src/app/models.py
Normal file
78
apps/coordinator-api/src/app/models.py
Normal file
@ -0,0 +1,78 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from datetime import datetime
|
||||
from enum import Enum
|
||||
from typing import Any, Dict, Optional
|
||||
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
|
||||
class JobState(str, Enum):
|
||||
queued = "QUEUED"
|
||||
running = "RUNNING"
|
||||
completed = "COMPLETED"
|
||||
failed = "FAILED"
|
||||
canceled = "CANCELED"
|
||||
expired = "EXPIRED"
|
||||
|
||||
|
||||
class Constraints(BaseModel):
|
||||
gpu: Optional[str] = None
|
||||
cuda: Optional[str] = None
|
||||
min_vram_gb: Optional[int] = None
|
||||
models: Optional[list[str]] = None
|
||||
region: Optional[str] = None
|
||||
max_price: Optional[float] = None
|
||||
|
||||
|
||||
class JobCreate(BaseModel):
|
||||
payload: Dict[str, Any]
|
||||
constraints: Constraints = Field(default_factory=Constraints)
|
||||
ttl_seconds: int = 900
|
||||
|
||||
|
||||
class JobView(BaseModel):
|
||||
job_id: str
|
||||
state: JobState
|
||||
assigned_miner_id: Optional[str] = None
|
||||
requested_at: datetime
|
||||
expires_at: datetime
|
||||
error: Optional[str] = None
|
||||
|
||||
|
||||
class JobResult(BaseModel):
|
||||
result: Optional[Dict[str, Any]] = None
|
||||
receipt: Optional[Dict[str, Any]] = None
|
||||
|
||||
|
||||
class MinerRegister(BaseModel):
|
||||
capabilities: Dict[str, Any]
|
||||
concurrency: int = 1
|
||||
region: Optional[str] = None
|
||||
|
||||
|
||||
class MinerHeartbeat(BaseModel):
|
||||
inflight: int = 0
|
||||
status: str = "ONLINE"
|
||||
metadata: Dict[str, Any] = Field(default_factory=dict)
|
||||
|
||||
|
||||
class PollRequest(BaseModel):
|
||||
max_wait_seconds: int = 15
|
||||
|
||||
|
||||
class AssignedJob(BaseModel):
|
||||
job_id: str
|
||||
payload: Dict[str, Any]
|
||||
constraints: Constraints
|
||||
|
||||
|
||||
class JobResultSubmit(BaseModel):
|
||||
result: Dict[str, Any]
|
||||
metrics: Dict[str, Any] = Field(default_factory=dict)
|
||||
|
||||
|
||||
class JobFailSubmit(BaseModel):
|
||||
error_code: str
|
||||
error_message: str
|
||||
metrics: Dict[str, Any] = Field(default_factory=dict)
|
||||
1
apps/coordinator-api/src/app/routers/__init__.py
Normal file
1
apps/coordinator-api/src/app/routers/__init__.py
Normal file
@ -0,0 +1 @@
|
||||
"""Router modules for the coordinator API."""
|
||||
69
apps/coordinator-api/src/app/routers/admin.py
Normal file
69
apps/coordinator-api/src/app/routers/admin.py
Normal file
@ -0,0 +1,69 @@
|
||||
from fastapi import APIRouter, Depends, HTTPException, status
|
||||
|
||||
from ..deps import require_admin_key
|
||||
from ..services import JobService, MinerService
|
||||
from ..storage import SessionDep
|
||||
|
||||
router = APIRouter(prefix="/admin", tags=["admin"])
|
||||
|
||||
|
||||
@router.get("/stats", summary="Get coordinator stats")
|
||||
async def get_stats(session: SessionDep, admin_key: str = Depends(require_admin_key())) -> dict[str, int]: # type: ignore[arg-type]
|
||||
service = JobService(session)
|
||||
from sqlmodel import func, select
|
||||
from ..domain import Job
|
||||
|
||||
total_jobs = session.exec(select(func.count()).select_from(Job)).one()
|
||||
active_jobs = session.exec(select(func.count()).select_from(Job).where(Job.state.in_(["QUEUED", "RUNNING"]))).one()
|
||||
|
||||
miner_service = MinerService(session)
|
||||
miners = miner_service.list_records()
|
||||
avg_job_duration = (
|
||||
sum(miner.average_job_duration_ms for miner in miners if miner.average_job_duration_ms) / max(len(miners), 1)
|
||||
)
|
||||
return {
|
||||
"total_jobs": int(total_jobs or 0),
|
||||
"active_jobs": int(active_jobs or 0),
|
||||
"online_miners": miner_service.online_count(),
|
||||
"avg_miner_job_duration_ms": avg_job_duration,
|
||||
}
|
||||
|
||||
|
||||
@router.get("/jobs", summary="List jobs")
|
||||
async def list_jobs(session: SessionDep, admin_key: str = Depends(require_admin_key())) -> dict[str, list[dict]]: # type: ignore[arg-type]
|
||||
from ..domain import Job
|
||||
|
||||
jobs = session.exec(select(Job).order_by(Job.requested_at.desc()).limit(100)).all()
|
||||
return {
|
||||
"items": [
|
||||
{
|
||||
"job_id": job.id,
|
||||
"state": job.state,
|
||||
"client_id": job.client_id,
|
||||
"assigned_miner_id": job.assigned_miner_id,
|
||||
"requested_at": job.requested_at.isoformat(),
|
||||
}
|
||||
for job in jobs
|
||||
]
|
||||
}
|
||||
|
||||
|
||||
@router.get("/miners", summary="List miners")
|
||||
async def list_miners(session: SessionDep, admin_key: str = Depends(require_admin_key())) -> dict[str, list[dict]]: # type: ignore[arg-type]
|
||||
miner_service = MinerService(session)
|
||||
miners = [
|
||||
{
|
||||
"miner_id": record.miner_id,
|
||||
"status": record.status,
|
||||
"inflight": record.inflight,
|
||||
"concurrency": record.concurrency,
|
||||
"region": record.region,
|
||||
"last_heartbeat": record.last_heartbeat.isoformat(),
|
||||
"average_job_duration_ms": record.average_job_duration_ms,
|
||||
"jobs_completed": record.jobs_completed,
|
||||
"jobs_failed": record.jobs_failed,
|
||||
"last_receipt_id": record.last_receipt_id,
|
||||
}
|
||||
for record in miner_service.list_records()
|
||||
]
|
||||
return {"items": miners}
|
||||
97
apps/coordinator-api/src/app/routers/client.py
Normal file
97
apps/coordinator-api/src/app/routers/client.py
Normal file
@ -0,0 +1,97 @@
|
||||
from fastapi import APIRouter, Depends, HTTPException, status
|
||||
|
||||
from ..deps import require_client_key
|
||||
from ..models import JobCreate, JobView, JobResult
|
||||
from ..services import JobService
|
||||
from ..storage import SessionDep
|
||||
|
||||
router = APIRouter(tags=["client"])
|
||||
|
||||
@router.post("/jobs", response_model=JobView, status_code=status.HTTP_201_CREATED, summary="Submit a job")
|
||||
async def submit_job(
|
||||
req: JobCreate,
|
||||
session: SessionDep,
|
||||
client_id: str = Depends(require_client_key()),
|
||||
) -> JobView: # type: ignore[arg-type]
|
||||
service = JobService(session)
|
||||
job = service.create_job(client_id, req)
|
||||
return service.to_view(job)
|
||||
|
||||
|
||||
@router.get("/jobs/{job_id}", response_model=JobView, summary="Get job status")
|
||||
async def get_job(
|
||||
job_id: str,
|
||||
session: SessionDep,
|
||||
client_id: str = Depends(require_client_key()),
|
||||
) -> JobView: # type: ignore[arg-type]
|
||||
service = JobService(session)
|
||||
try:
|
||||
job = service.get_job(job_id, client_id=client_id)
|
||||
except KeyError:
|
||||
raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail="job not found")
|
||||
return service.to_view(job)
|
||||
|
||||
|
||||
@router.get("/jobs/{job_id}/result", response_model=JobResult, summary="Get job result")
|
||||
async def get_job_result(
|
||||
job_id: str,
|
||||
session: SessionDep,
|
||||
client_id: str = Depends(require_client_key()),
|
||||
) -> JobResult: # type: ignore[arg-type]
|
||||
service = JobService(session)
|
||||
try:
|
||||
job = service.get_job(job_id, client_id=client_id)
|
||||
except KeyError:
|
||||
raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail="job not found")
|
||||
|
||||
if job.state not in {JobState.completed, JobState.failed, JobState.canceled, JobState.expired}:
|
||||
raise HTTPException(status_code=status.HTTP_425_TOO_EARLY, detail="job not ready")
|
||||
if job.result is None and job.receipt is None:
|
||||
raise HTTPException(status_code=status.HTTP_425_TOO_EARLY, detail="job not ready")
|
||||
return service.to_result(job)
|
||||
|
||||
|
||||
@router.post("/jobs/{job_id}/cancel", response_model=JobView, summary="Cancel job")
|
||||
async def cancel_job(
|
||||
job_id: str,
|
||||
session: SessionDep,
|
||||
client_id: str = Depends(require_client_key()),
|
||||
) -> JobView: # type: ignore[arg-type]
|
||||
service = JobService(session)
|
||||
try:
|
||||
job = service.get_job(job_id, client_id=client_id)
|
||||
except KeyError:
|
||||
raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail="job not found")
|
||||
|
||||
if job.state not in {JobState.queued, JobState.running}:
|
||||
raise HTTPException(status_code=status.HTTP_409_CONFLICT, detail="job not cancelable")
|
||||
|
||||
job = service.cancel_job(job)
|
||||
return service.to_view(job)
|
||||
|
||||
|
||||
@router.get("/jobs/{job_id}/receipt", summary="Get latest signed receipt")
|
||||
async def get_job_receipt(
|
||||
job_id: str,
|
||||
session: SessionDep,
|
||||
client_id: str = Depends(require_client_key()),
|
||||
) -> dict: # type: ignore[arg-type]
|
||||
service = JobService(session)
|
||||
try:
|
||||
job = service.get_job(job_id, client_id=client_id)
|
||||
except KeyError:
|
||||
raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail="job not found")
|
||||
if not job.receipt:
|
||||
raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail="receipt not available")
|
||||
return job.receipt
|
||||
|
||||
|
||||
@router.get("/jobs/{job_id}/receipts", summary="List signed receipts")
|
||||
async def list_job_receipts(
|
||||
job_id: str,
|
||||
session: SessionDep,
|
||||
client_id: str = Depends(require_client_key()),
|
||||
) -> dict: # type: ignore[arg-type]
|
||||
service = JobService(session)
|
||||
receipts = service.list_receipts(job_id, client_id=client_id)
|
||||
return {"items": [row.payload for row in receipts]}
|
||||
110
apps/coordinator-api/src/app/routers/miner.py
Normal file
110
apps/coordinator-api/src/app/routers/miner.py
Normal file
@ -0,0 +1,110 @@
|
||||
from datetime import datetime
|
||||
from typing import Any
|
||||
|
||||
from fastapi import APIRouter, Depends, HTTPException, Response, status
|
||||
|
||||
from ..deps import require_miner_key
|
||||
from ..models import AssignedJob, JobFailSubmit, JobResultSubmit, JobState, MinerHeartbeat, MinerRegister, PollRequest
|
||||
from ..services import JobService, MinerService
|
||||
from ..services.receipts import ReceiptService
|
||||
from ..storage import SessionDep
|
||||
|
||||
router = APIRouter(tags=["miner"])
|
||||
|
||||
|
||||
@router.post("/miners/register", summary="Register or update miner")
|
||||
async def register(
|
||||
req: MinerRegister,
|
||||
session: SessionDep,
|
||||
miner_id: str = Depends(require_miner_key()),
|
||||
) -> dict[str, Any]: # type: ignore[arg-type]
|
||||
service = MinerService(session)
|
||||
record = service.register(miner_id, req)
|
||||
return {"status": "ok", "session_token": record.session_token}
|
||||
|
||||
@router.post("/miners/heartbeat", summary="Send miner heartbeat")
|
||||
async def heartbeat(
|
||||
req: MinerHeartbeat,
|
||||
session: SessionDep,
|
||||
miner_id: str = Depends(require_miner_key()),
|
||||
) -> dict[str, str]: # type: ignore[arg-type]
|
||||
try:
|
||||
MinerService(session).heartbeat(miner_id, req)
|
||||
except KeyError:
|
||||
raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail="miner not registered")
|
||||
return {"status": "ok"}
|
||||
|
||||
|
||||
# NOTE: until scheduling is fully implemented the poll endpoint performs a simple FIFO assignment.
|
||||
@router.post("/miners/poll", response_model=AssignedJob, summary="Poll for next job")
|
||||
async def poll(
|
||||
req: PollRequest,
|
||||
session: SessionDep,
|
||||
miner_id: str = Depends(require_miner_key()),
|
||||
) -> AssignedJob | Response: # type: ignore[arg-type]
|
||||
job = MinerService(session).poll(miner_id, req.max_wait_seconds)
|
||||
if job is None:
|
||||
return Response(status_code=status.HTTP_204_NO_CONTENT)
|
||||
return job
|
||||
|
||||
|
||||
@router.post("/miners/{job_id}/result", summary="Submit job result")
|
||||
async def submit_result(
|
||||
job_id: str,
|
||||
req: JobResultSubmit,
|
||||
session: SessionDep,
|
||||
miner_id: str = Depends(require_miner_key()),
|
||||
) -> dict[str, Any]: # type: ignore[arg-type]
|
||||
job_service = JobService(session)
|
||||
miner_service = MinerService(session)
|
||||
receipt_service = ReceiptService(session)
|
||||
try:
|
||||
job = job_service.get_job(job_id)
|
||||
except KeyError:
|
||||
raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail="job not found")
|
||||
|
||||
job.result = req.result
|
||||
job.state = JobState.completed
|
||||
job.error = None
|
||||
|
||||
metrics = dict(req.metrics or {})
|
||||
duration_ms = metrics.get("duration_ms")
|
||||
if duration_ms is None and job.requested_at:
|
||||
duration_ms = int((datetime.utcnow() - job.requested_at).total_seconds() * 1000)
|
||||
metrics["duration_ms"] = duration_ms
|
||||
|
||||
receipt = receipt_service.create_receipt(job, miner_id, req.result, metrics)
|
||||
job.receipt = receipt
|
||||
job.receipt_id = receipt["receipt_id"] if receipt else None
|
||||
session.add(job)
|
||||
session.commit()
|
||||
miner_service.release(
|
||||
miner_id,
|
||||
success=True,
|
||||
duration_ms=duration_ms,
|
||||
receipt_id=receipt["receipt_id"] if receipt else None,
|
||||
)
|
||||
return {"status": "ok", "receipt": receipt}
|
||||
|
||||
|
||||
@router.post("/miners/{job_id}/fail", summary="Submit job failure")
|
||||
async def submit_failure(
|
||||
job_id: str,
|
||||
req: JobFailSubmit,
|
||||
session: SessionDep,
|
||||
miner_id: str = Depends(require_miner_key()),
|
||||
) -> dict[str, str]: # type: ignore[arg-type]
|
||||
job_service = JobService(session)
|
||||
miner_service = MinerService(session)
|
||||
try:
|
||||
job = job_service.get_job(job_id)
|
||||
except KeyError:
|
||||
raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail="job not found")
|
||||
|
||||
job.state = JobState.failed
|
||||
job.error = f"{req.error_code}: {req.error_message}"
|
||||
job.assigned_miner_id = miner_id
|
||||
session.add(job)
|
||||
session.commit()
|
||||
miner_service.release(miner_id, success=False)
|
||||
return {"status": "ok"}
|
||||
6
apps/coordinator-api/src/app/services/__init__.py
Normal file
6
apps/coordinator-api/src/app/services/__init__.py
Normal file
@ -0,0 +1,6 @@
|
||||
"""Service layer for coordinator business logic."""
|
||||
|
||||
from .jobs import JobService
|
||||
from .miners import MinerService
|
||||
|
||||
__all__ = ["JobService", "MinerService"]
|
||||
156
apps/coordinator-api/src/app/services/jobs.py
Normal file
156
apps/coordinator-api/src/app/services/jobs.py
Normal file
@ -0,0 +1,156 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from datetime import datetime, timedelta
|
||||
from typing import Optional
|
||||
|
||||
from sqlmodel import Session, select
|
||||
|
||||
from ..domain import Job, Miner, JobReceipt
|
||||
from ..models import AssignedJob, Constraints, JobCreate, JobResult, JobState, JobView
|
||||
|
||||
|
||||
class JobService:
|
||||
def __init__(self, session: Session):
|
||||
self.session = session
|
||||
|
||||
def create_job(self, client_id: str, req: JobCreate) -> Job:
|
||||
ttl = max(req.ttl_seconds, 1)
|
||||
now = datetime.utcnow()
|
||||
job = Job(
|
||||
client_id=client_id,
|
||||
payload=req.payload,
|
||||
constraints=req.constraints.model_dump(exclude_none=True),
|
||||
ttl_seconds=ttl,
|
||||
requested_at=now,
|
||||
expires_at=now + timedelta(seconds=ttl),
|
||||
)
|
||||
self.session.add(job)
|
||||
self.session.commit()
|
||||
self.session.refresh(job)
|
||||
return job
|
||||
|
||||
def get_job(self, job_id: str, client_id: Optional[str] = None) -> Job:
|
||||
query = select(Job).where(Job.id == job_id)
|
||||
if client_id:
|
||||
query = query.where(Job.client_id == client_id)
|
||||
job = self.session.exec(query).one_or_none()
|
||||
if not job:
|
||||
raise KeyError("job not found")
|
||||
return self._ensure_not_expired(job)
|
||||
|
||||
def list_receipts(self, job_id: str, client_id: Optional[str] = None) -> list[JobReceipt]:
|
||||
job = self.get_job(job_id, client_id=client_id)
|
||||
receipts = self.session.exec(
|
||||
select(JobReceipt)
|
||||
.where(JobReceipt.job_id == job.id)
|
||||
.order_by(JobReceipt.created_at.asc())
|
||||
).all()
|
||||
return receipts
|
||||
|
||||
def cancel_job(self, job: Job) -> Job:
|
||||
if job.state not in {JobState.queued, JobState.running}:
|
||||
return job
|
||||
job.state = JobState.canceled
|
||||
job.error = "canceled by client"
|
||||
job.assigned_miner_id = None
|
||||
self.session.add(job)
|
||||
self.session.commit()
|
||||
self.session.refresh(job)
|
||||
return job
|
||||
|
||||
def to_view(self, job: Job) -> JobView:
|
||||
return JobView(
|
||||
job_id=job.id,
|
||||
state=job.state,
|
||||
assigned_miner_id=job.assigned_miner_id,
|
||||
requested_at=job.requested_at,
|
||||
expires_at=job.expires_at,
|
||||
error=job.error,
|
||||
)
|
||||
|
||||
def to_result(self, job: Job) -> JobResult:
|
||||
return JobResult(result=job.result, receipt=job.receipt)
|
||||
|
||||
def to_assigned(self, job: Job) -> AssignedJob:
|
||||
constraints = Constraints(**job.constraints) if isinstance(job.constraints, dict) else Constraints()
|
||||
return AssignedJob(job_id=job.id, payload=job.payload, constraints=constraints)
|
||||
|
||||
def acquire_next_job(self, miner: Miner) -> Optional[Job]:
|
||||
now = datetime.utcnow()
|
||||
statement = (
|
||||
select(Job)
|
||||
.where(Job.state == JobState.queued)
|
||||
.order_by(Job.requested_at.asc())
|
||||
)
|
||||
|
||||
jobs = self.session.exec(statement).all()
|
||||
for job in jobs:
|
||||
job = self._ensure_not_expired(job)
|
||||
if job.state != JobState.queued:
|
||||
continue
|
||||
if job.expires_at <= now:
|
||||
continue
|
||||
if not self._satisfies_constraints(job, miner):
|
||||
continue
|
||||
job.state = JobState.running
|
||||
job.assigned_miner_id = miner.id
|
||||
self.session.add(job)
|
||||
self.session.commit()
|
||||
self.session.refresh(job)
|
||||
return job
|
||||
return None
|
||||
|
||||
def _ensure_not_expired(self, job: Job) -> Job:
|
||||
if job.state == JobState.queued and job.expires_at <= datetime.utcnow():
|
||||
job.state = JobState.expired
|
||||
job.error = "job expired"
|
||||
self.session.add(job)
|
||||
self.session.commit()
|
||||
self.session.refresh(job)
|
||||
return job
|
||||
|
||||
def _satisfies_constraints(self, job: Job, miner: Miner) -> bool:
|
||||
if not job.constraints:
|
||||
return True
|
||||
constraints = Constraints(**job.constraints)
|
||||
capabilities = miner.capabilities or {}
|
||||
|
||||
# Region matching
|
||||
if constraints.region and constraints.region != miner.region:
|
||||
return False
|
||||
|
||||
gpu_specs = capabilities.get("gpus", []) or []
|
||||
has_gpu = bool(gpu_specs)
|
||||
|
||||
if constraints.gpu:
|
||||
if not has_gpu:
|
||||
return False
|
||||
names = [gpu.get("name") for gpu in gpu_specs]
|
||||
if constraints.gpu not in names:
|
||||
return False
|
||||
|
||||
if constraints.min_vram_gb:
|
||||
required_mb = constraints.min_vram_gb * 1024
|
||||
if not any((gpu.get("memory_mb") or 0) >= required_mb for gpu in gpu_specs):
|
||||
return False
|
||||
|
||||
if constraints.cuda:
|
||||
cuda_info = capabilities.get("cuda")
|
||||
if not cuda_info or constraints.cuda not in str(cuda_info):
|
||||
return False
|
||||
|
||||
if constraints.models:
|
||||
available_models = capabilities.get("models", [])
|
||||
if not set(constraints.models).issubset(set(available_models)):
|
||||
return False
|
||||
|
||||
if constraints.max_price is not None:
|
||||
price = capabilities.get("price")
|
||||
try:
|
||||
price_value = float(price)
|
||||
except (TypeError, ValueError):
|
||||
return False
|
||||
if price_value > constraints.max_price:
|
||||
return False
|
||||
|
||||
return True
|
||||
110
apps/coordinator-api/src/app/services/miners.py
Normal file
110
apps/coordinator-api/src/app/services/miners.py
Normal file
@ -0,0 +1,110 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from datetime import datetime
|
||||
from typing import Optional
|
||||
from uuid import uuid4
|
||||
|
||||
from sqlmodel import Session, select
|
||||
|
||||
from ..domain import Miner
|
||||
from ..models import AssignedJob, MinerHeartbeat, MinerRegister
|
||||
from .jobs import JobService
|
||||
|
||||
|
||||
class MinerService:
|
||||
def __init__(self, session: Session):
|
||||
self.session = session
|
||||
|
||||
def register(self, miner_id: str, payload: MinerRegister) -> Miner:
|
||||
miner = self.session.get(Miner, miner_id)
|
||||
session_token = uuid4().hex
|
||||
if miner is None:
|
||||
miner = Miner(
|
||||
id=miner_id,
|
||||
capabilities=payload.capabilities,
|
||||
concurrency=payload.concurrency,
|
||||
region=payload.region,
|
||||
session_token=session_token,
|
||||
)
|
||||
self.session.add(miner)
|
||||
else:
|
||||
miner.capabilities = payload.capabilities
|
||||
miner.concurrency = payload.concurrency
|
||||
miner.region = payload.region
|
||||
miner.session_token = session_token
|
||||
miner.last_heartbeat = datetime.utcnow()
|
||||
miner.status = "ONLINE"
|
||||
self.session.commit()
|
||||
self.session.refresh(miner)
|
||||
return miner
|
||||
|
||||
def heartbeat(self, miner_id: str, payload: MinerHeartbeat | dict) -> Miner:
|
||||
if not isinstance(payload, MinerHeartbeat):
|
||||
payload = MinerHeartbeat.model_validate(payload)
|
||||
miner = self.session.get(Miner, miner_id)
|
||||
if miner is None:
|
||||
raise KeyError("miner not registered")
|
||||
miner.inflight = payload.inflight
|
||||
miner.status = payload.status
|
||||
miner.extra_metadata = payload.metadata
|
||||
miner.last_heartbeat = datetime.utcnow()
|
||||
self.session.add(miner)
|
||||
self.session.commit()
|
||||
self.session.refresh(miner)
|
||||
return miner
|
||||
|
||||
def poll(self, miner_id: str, max_wait_seconds: int) -> Optional[AssignedJob]:
|
||||
miner = self.session.get(Miner, miner_id)
|
||||
if miner is None:
|
||||
raise KeyError("miner not registered")
|
||||
if miner.concurrency and miner.inflight >= miner.concurrency:
|
||||
return None
|
||||
|
||||
job_service = JobService(self.session)
|
||||
job = job_service.acquire_next_job(miner)
|
||||
if not job:
|
||||
return None
|
||||
|
||||
miner.inflight += 1
|
||||
miner.last_heartbeat = datetime.utcnow()
|
||||
miner.last_job_at = datetime.utcnow()
|
||||
self.session.add(miner)
|
||||
self.session.commit()
|
||||
return job_service.to_assigned(job)
|
||||
|
||||
def release(
|
||||
self,
|
||||
miner_id: str,
|
||||
success: bool | None = None,
|
||||
duration_ms: int | None = None,
|
||||
receipt_id: str | None = None,
|
||||
) -> None:
|
||||
miner = self.session.get(Miner, miner_id)
|
||||
if miner:
|
||||
miner.inflight = max(0, miner.inflight - 1)
|
||||
if success is True:
|
||||
miner.jobs_completed += 1
|
||||
if duration_ms is not None:
|
||||
miner.total_job_duration_ms += duration_ms
|
||||
miner.average_job_duration_ms = (
|
||||
miner.total_job_duration_ms / max(miner.jobs_completed, 1)
|
||||
)
|
||||
elif success is False:
|
||||
miner.jobs_failed += 1
|
||||
if receipt_id:
|
||||
miner.last_receipt_id = receipt_id
|
||||
self.session.add(miner)
|
||||
self.session.commit()
|
||||
|
||||
def get(self, miner_id: str) -> Miner:
|
||||
miner = self.session.get(Miner, miner_id)
|
||||
if miner is None:
|
||||
raise KeyError("miner not registered")
|
||||
return miner
|
||||
|
||||
def list_records(self) -> list[Miner]:
|
||||
return list(self.session.exec(select(Miner)).all())
|
||||
|
||||
def online_count(self) -> int:
|
||||
result = self.session.exec(select(Miner).where(Miner.status == "ONLINE"))
|
||||
return len(result.all())
|
||||
79
apps/coordinator-api/src/app/services/receipts.py
Normal file
79
apps/coordinator-api/src/app/services/receipts.py
Normal file
@ -0,0 +1,79 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import Any, Dict, Optional
|
||||
from secrets import token_hex
|
||||
from datetime import datetime
|
||||
|
||||
from aitbc_crypto.signing import ReceiptSigner
|
||||
|
||||
from sqlmodel import Session
|
||||
|
||||
from ..config import settings
|
||||
from ..domain import Job, JobReceipt
|
||||
|
||||
|
||||
class ReceiptService:
|
||||
def __init__(self, session: Session) -> None:
|
||||
self.session = session
|
||||
self._signer: Optional[ReceiptSigner] = None
|
||||
self._attestation_signer: Optional[ReceiptSigner] = None
|
||||
if settings.receipt_signing_key_hex:
|
||||
key_bytes = bytes.fromhex(settings.receipt_signing_key_hex)
|
||||
self._signer = ReceiptSigner(key_bytes)
|
||||
if settings.receipt_attestation_key_hex:
|
||||
attest_bytes = bytes.fromhex(settings.receipt_attestation_key_hex)
|
||||
self._attestation_signer = ReceiptSigner(attest_bytes)
|
||||
|
||||
def create_receipt(
|
||||
self,
|
||||
job: Job,
|
||||
miner_id: str,
|
||||
job_result: Dict[str, Any] | None,
|
||||
result_metrics: Dict[str, Any] | None,
|
||||
) -> Dict[str, Any] | None:
|
||||
if self._signer is None:
|
||||
return None
|
||||
payload = {
|
||||
"version": "1.0",
|
||||
"receipt_id": token_hex(16),
|
||||
"job_id": job.id,
|
||||
"provider": miner_id,
|
||||
"client": job.client_id,
|
||||
"units": _first_present([
|
||||
(result_metrics or {}).get("units"),
|
||||
(job_result or {}).get("units"),
|
||||
], default=0.0),
|
||||
"unit_type": _first_present([
|
||||
(result_metrics or {}).get("unit_type"),
|
||||
(job_result or {}).get("unit_type"),
|
||||
], default="gpu_seconds"),
|
||||
"price": _first_present([
|
||||
(result_metrics or {}).get("price"),
|
||||
(job_result or {}).get("price"),
|
||||
]),
|
||||
"started_at": int(job.requested_at.timestamp()) if job.requested_at else int(datetime.utcnow().timestamp()),
|
||||
"completed_at": int(datetime.utcnow().timestamp()),
|
||||
"metadata": {
|
||||
"job_payload": job.payload,
|
||||
"job_constraints": job.constraints,
|
||||
"result": job_result,
|
||||
"metrics": result_metrics,
|
||||
},
|
||||
}
|
||||
payload["signature"] = self._signer.sign(payload)
|
||||
if self._attestation_signer:
|
||||
payload.setdefault("attestations", [])
|
||||
attestation_payload = dict(payload)
|
||||
attestation_payload.pop("attestations", None)
|
||||
attestation_payload.pop("signature", None)
|
||||
payload["attestations"].append(self._attestation_signer.sign(attestation_payload))
|
||||
receipt_row = JobReceipt(job_id=job.id, receipt_id=payload["receipt_id"], payload=payload)
|
||||
self.session.add(receipt_row)
|
||||
return payload
|
||||
|
||||
|
||||
def _first_present(values: list[Optional[Any]], default: Optional[Any] = None) -> Optional[Any]:
|
||||
for value in values:
|
||||
if value is not None:
|
||||
return value
|
||||
return default
|
||||
5
apps/coordinator-api/src/app/storage/__init__.py
Normal file
5
apps/coordinator-api/src/app/storage/__init__.py
Normal file
@ -0,0 +1,5 @@
|
||||
"""Persistence helpers for the coordinator API."""
|
||||
|
||||
from .db import SessionDep, get_session, init_db
|
||||
|
||||
__all__ = ["SessionDep", "get_session", "init_db"]
|
||||
42
apps/coordinator-api/src/app/storage/db.py
Normal file
42
apps/coordinator-api/src/app/storage/db.py
Normal file
@ -0,0 +1,42 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from contextlib import contextmanager
|
||||
from typing import Annotated, Generator
|
||||
|
||||
from fastapi import Depends
|
||||
from sqlalchemy.engine import Engine
|
||||
from sqlmodel import Session, SQLModel, create_engine
|
||||
|
||||
from ..config import settings
|
||||
from ..domain import Job, Miner
|
||||
|
||||
_engine: Engine | None = None
|
||||
|
||||
|
||||
def get_engine() -> Engine:
|
||||
global _engine
|
||||
|
||||
if _engine is None:
|
||||
connect_args = {"check_same_thread": False} if settings.database_url.startswith("sqlite") else {}
|
||||
_engine = create_engine(settings.database_url, echo=False, connect_args=connect_args)
|
||||
return _engine
|
||||
|
||||
|
||||
def init_db() -> None:
|
||||
engine = get_engine()
|
||||
SQLModel.metadata.create_all(engine)
|
||||
|
||||
|
||||
@contextmanager
|
||||
def session_scope() -> Generator[Session, None, None]:
|
||||
engine = get_engine()
|
||||
with Session(engine) as session:
|
||||
yield session
|
||||
|
||||
|
||||
def get_session() -> Generator[Session, None, None]:
|
||||
with session_scope() as session:
|
||||
yield session
|
||||
|
||||
|
||||
SessionDep = Annotated[Session, Depends(get_session)]
|
||||
77
apps/coordinator-api/tests/test_client_receipts.py
Normal file
77
apps/coordinator-api/tests/test_client_receipts.py
Normal file
@ -0,0 +1,77 @@
|
||||
import pytest
|
||||
from fastapi.testclient import TestClient
|
||||
from nacl.signing import SigningKey
|
||||
|
||||
from app.main import create_app
|
||||
from app.models import JobCreate, MinerRegister, JobResultSubmit
|
||||
from app.storage.db import init_db
|
||||
from app.config import settings
|
||||
|
||||
|
||||
@pytest.fixture(scope="module", autouse=True)
|
||||
def test_client(tmp_path_factory):
|
||||
db_file = tmp_path_factory.mktemp("data") / "client_receipts.db"
|
||||
settings.database_url = f"sqlite:///{db_file}"
|
||||
init_db()
|
||||
app = create_app()
|
||||
with TestClient(app) as client:
|
||||
yield client
|
||||
|
||||
|
||||
def test_receipt_endpoint_returns_signed_receipt(test_client: TestClient):
|
||||
signing_key = SigningKey.generate()
|
||||
settings.receipt_signing_key_hex = signing_key.encode().hex()
|
||||
|
||||
# register miner
|
||||
resp = test_client.post(
|
||||
"/v1/miners/register",
|
||||
json={"capabilities": {"price": 1}, "concurrency": 1},
|
||||
headers={"X-Api-Key": "miner_dev_key_1"},
|
||||
)
|
||||
assert resp.status_code == 200
|
||||
|
||||
# submit job
|
||||
job_payload = {
|
||||
"payload": {"task": "receipt"},
|
||||
}
|
||||
resp = test_client.post(
|
||||
"/v1/jobs",
|
||||
json=job_payload,
|
||||
headers={"X-Api-Key": "client_dev_key_1"},
|
||||
)
|
||||
assert resp.status_code == 201
|
||||
job_id = resp.json()["job_id"]
|
||||
|
||||
# poll for job assignment
|
||||
poll_resp = test_client.post(
|
||||
"/v1/miners/poll",
|
||||
json={"max_wait_seconds": 1},
|
||||
headers={"X-Api-Key": "miner_dev_key_1"},
|
||||
)
|
||||
assert poll_resp.status_code in (200, 204)
|
||||
|
||||
# submit result
|
||||
result_payload = {
|
||||
"result": {"units": 1, "unit_type": "gpu_seconds", "price": 1},
|
||||
"metrics": {"units": 1, "duration_ms": 500}
|
||||
}
|
||||
result_resp = test_client.post(
|
||||
f"/v1/miners/{job_id}/result",
|
||||
json=result_payload,
|
||||
headers={"X-Api-Key": "miner_dev_key_1"},
|
||||
)
|
||||
assert result_resp.status_code == 200
|
||||
signed_receipt = result_resp.json()["receipt"]
|
||||
assert signed_receipt["signature"]["alg"] == "Ed25519"
|
||||
|
||||
# fetch receipt via client endpoint
|
||||
receipt_resp = test_client.get(
|
||||
f"/v1/jobs/{job_id}/receipt",
|
||||
headers={"X-Api-Key": "client_dev_key_1"},
|
||||
)
|
||||
assert receipt_resp.status_code == 200
|
||||
payload = receipt_resp.json()
|
||||
assert payload["receipt_id"] == signed_receipt["receipt_id"]
|
||||
assert payload["signature"]["alg"] == "Ed25519"
|
||||
|
||||
settings.receipt_signing_key_hex = None
|
||||
57
apps/coordinator-api/tests/test_jobs.py
Normal file
57
apps/coordinator-api/tests/test_jobs.py
Normal file
@ -0,0 +1,57 @@
|
||||
import pytest
|
||||
from sqlmodel import Session, delete
|
||||
|
||||
from app.domain import Job, Miner
|
||||
from app.models import JobCreate
|
||||
from app.services.jobs import JobService
|
||||
from app.storage.db import init_db, session_scope
|
||||
|
||||
|
||||
@pytest.fixture(scope="module", autouse=True)
|
||||
def _init_db(tmp_path_factory):
|
||||
db_file = tmp_path_factory.mktemp("data") / "test.db"
|
||||
# override settings dynamically
|
||||
from app.config import settings
|
||||
|
||||
settings.database_url = f"sqlite:///{db_file}"
|
||||
init_db()
|
||||
yield
|
||||
|
||||
|
||||
@pytest.fixture()
|
||||
def session():
|
||||
with session_scope() as sess:
|
||||
sess.exec(delete(Job))
|
||||
sess.exec(delete(Miner))
|
||||
sess.commit()
|
||||
yield sess
|
||||
|
||||
|
||||
def test_create_and_fetch_job(session: Session):
|
||||
svc = JobService(session)
|
||||
job = svc.create_job("client1", JobCreate(payload={"task": "noop"}))
|
||||
fetched = svc.get_job(job.id, client_id="client1")
|
||||
assert fetched.id == job.id
|
||||
assert fetched.payload["task"] == "noop"
|
||||
|
||||
|
||||
def test_acquire_next_job(session: Session):
|
||||
svc = JobService(session)
|
||||
job1 = svc.create_job("client1", JobCreate(payload={"n": 1}))
|
||||
job2 = svc.create_job("client1", JobCreate(payload={"n": 2}))
|
||||
|
||||
miner = Miner(id="miner1", capabilities={}, concurrency=1)
|
||||
session.add(miner)
|
||||
session.commit()
|
||||
|
||||
next_job = svc.acquire_next_job(miner)
|
||||
assert next_job is not None
|
||||
assert next_job.id == job1.id
|
||||
assert next_job.state == "RUNNING"
|
||||
|
||||
next_job2 = svc.acquire_next_job(miner)
|
||||
assert next_job2 is not None
|
||||
assert next_job2.id == job2.id
|
||||
|
||||
# No more jobs
|
||||
assert svc.acquire_next_job(miner) is None
|
||||
258
apps/coordinator-api/tests/test_miner_service.py
Normal file
258
apps/coordinator-api/tests/test_miner_service.py
Normal file
@ -0,0 +1,258 @@
|
||||
import pytest
|
||||
from sqlmodel import Session
|
||||
from nacl.signing import SigningKey
|
||||
|
||||
from aitbc_crypto.signing import ReceiptVerifier
|
||||
|
||||
from app.models import MinerRegister, JobCreate, Constraints
|
||||
from app.services.jobs import JobService
|
||||
from app.services.miners import MinerService
|
||||
from app.services.receipts import ReceiptService
|
||||
from app.storage.db import init_db, session_scope
|
||||
from app.config import settings
|
||||
from app.domain import JobReceipt
|
||||
from sqlmodel import select
|
||||
|
||||
|
||||
@pytest.fixture(scope="module", autouse=True)
|
||||
def _init_db(tmp_path_factory):
|
||||
db_file = tmp_path_factory.mktemp("data") / "miner.db"
|
||||
from app.config import settings
|
||||
|
||||
settings.database_url = f"sqlite:///{db_file}"
|
||||
init_db()
|
||||
yield
|
||||
|
||||
|
||||
@pytest.fixture()
|
||||
def session():
|
||||
with session_scope() as sess:
|
||||
yield sess
|
||||
|
||||
|
||||
def test_register_and_poll_inflight(session: Session):
|
||||
miner_service = MinerService(session)
|
||||
job_service = JobService(session)
|
||||
|
||||
miner_service.register(
|
||||
"miner-1",
|
||||
MinerRegister(
|
||||
capabilities={"gpu": False},
|
||||
concurrency=1,
|
||||
),
|
||||
)
|
||||
|
||||
job_service.create_job("client-a", JobCreate(payload={"task": "demo"}))
|
||||
assigned = miner_service.poll("miner-1", max_wait_seconds=1)
|
||||
assert assigned is not None
|
||||
|
||||
miner = miner_service.get("miner-1")
|
||||
assert miner.inflight == 1
|
||||
|
||||
miner_service.release("miner-1")
|
||||
miner = miner_service.get("miner-1")
|
||||
assert miner.inflight == 0
|
||||
|
||||
|
||||
def test_heartbeat_updates_metadata(session: Session):
|
||||
miner_service = MinerService(session)
|
||||
|
||||
miner_service.register(
|
||||
"miner-2",
|
||||
MinerRegister(
|
||||
capabilities={"gpu": True},
|
||||
concurrency=2,
|
||||
),
|
||||
)
|
||||
|
||||
miner_service.heartbeat(
|
||||
"miner-2",
|
||||
payload=dict(inflight=3, status="BUSY", metadata={"load": 0.9}),
|
||||
)
|
||||
|
||||
miner = miner_service.get("miner-2")
|
||||
assert miner.status == "BUSY"
|
||||
assert miner.inflight == 3
|
||||
assert miner.extra_metadata.get("load") == 0.9
|
||||
|
||||
|
||||
def test_capability_constrained_assignment(session: Session):
|
||||
miner_service = MinerService(session)
|
||||
job_service = JobService(session)
|
||||
|
||||
miner = miner_service.register(
|
||||
"miner-cap",
|
||||
MinerRegister(
|
||||
capabilities={
|
||||
"gpus": [{"name": "NVIDIA RTX 4090", "memory_mb": 24576}],
|
||||
"models": ["stable-diffusion", "llama"]
|
||||
},
|
||||
concurrency=1,
|
||||
region="eu-west",
|
||||
),
|
||||
)
|
||||
|
||||
job_service.create_job(
|
||||
"client-x",
|
||||
JobCreate(
|
||||
payload={"task": "render"},
|
||||
constraints=Constraints(region="us-east"),
|
||||
),
|
||||
)
|
||||
job_service.create_job(
|
||||
"client-x",
|
||||
JobCreate(
|
||||
payload={"task": "render-hf"},
|
||||
constraints=Constraints(
|
||||
region="eu-west",
|
||||
gpu="NVIDIA RTX 4090",
|
||||
min_vram_gb=12,
|
||||
models=["stable-diffusion"],
|
||||
),
|
||||
),
|
||||
)
|
||||
|
||||
assigned = miner_service.poll("miner-cap", max_wait_seconds=1)
|
||||
assert assigned is not None
|
||||
assert assigned.job_id is not None
|
||||
assert assigned.payload["task"] == "render-hf"
|
||||
|
||||
miner_state = miner_service.get("miner-cap")
|
||||
assert miner_state.inflight == 1
|
||||
|
||||
miner_service.release("miner-cap")
|
||||
|
||||
|
||||
def test_price_constraint(session: Session):
|
||||
miner_service = MinerService(session)
|
||||
job_service = JobService(session)
|
||||
|
||||
miner_service.register(
|
||||
"miner-price",
|
||||
MinerRegister(
|
||||
capabilities={
|
||||
"gpus": [{"name": "NVIDIA RTX 3070", "memory_mb": 8192}],
|
||||
"models": [],
|
||||
"price": 3.5,
|
||||
},
|
||||
concurrency=1,
|
||||
),
|
||||
)
|
||||
|
||||
job_service.create_job(
|
||||
"client-y",
|
||||
JobCreate(
|
||||
payload={"task": "cheap"},
|
||||
constraints=Constraints(max_price=2.0),
|
||||
),
|
||||
)
|
||||
job_service.create_job(
|
||||
"client-y",
|
||||
JobCreate(
|
||||
payload={"task": "fair"},
|
||||
constraints=Constraints(max_price=4.0),
|
||||
),
|
||||
)
|
||||
|
||||
assigned = miner_service.poll("miner-price", max_wait_seconds=1)
|
||||
assert assigned is not None
|
||||
assert assigned.payload["task"] == "fair"
|
||||
|
||||
miner_service.release("miner-price")
|
||||
|
||||
|
||||
def test_receipt_signing(session: Session):
|
||||
signing_key = SigningKey.generate()
|
||||
settings.receipt_signing_key_hex = signing_key.encode().hex()
|
||||
|
||||
job_service = JobService(session)
|
||||
miner_service = MinerService(session)
|
||||
receipt_service = ReceiptService(session)
|
||||
|
||||
miner_service.register(
|
||||
"miner-r",
|
||||
MinerRegister(
|
||||
capabilities={"price": 1.0},
|
||||
concurrency=1,
|
||||
),
|
||||
)
|
||||
|
||||
job = job_service.create_job(
|
||||
"client-r",
|
||||
JobCreate(payload={"task": "sign"}),
|
||||
)
|
||||
|
||||
receipt = receipt_service.create_receipt(
|
||||
job,
|
||||
"miner-r",
|
||||
{"units": 1.0, "unit_type": "gpu_seconds", "price": 1.2},
|
||||
{"units": 1.0},
|
||||
)
|
||||
|
||||
assert receipt is not None
|
||||
signature = receipt.get("signature")
|
||||
assert signature is not None
|
||||
assert signature["alg"] == "Ed25519"
|
||||
|
||||
miner_service.release("miner-r", success=True, duration_ms=500, receipt_id=receipt["receipt_id"])
|
||||
miner_state = miner_service.get("miner-r")
|
||||
assert miner_state.jobs_completed == 1
|
||||
assert miner_state.total_job_duration_ms == 500
|
||||
assert miner_state.average_job_duration_ms == 500
|
||||
assert miner_state.last_receipt_id == receipt["receipt_id"]
|
||||
|
||||
verifier = ReceiptVerifier(signing_key.verify_key.encode())
|
||||
payload = {k: v for k, v in receipt.items() if k not in {"signature", "attestations"}}
|
||||
assert verifier.verify(payload, receipt["signature"]) is True
|
||||
|
||||
# Reset signing key for subsequent tests
|
||||
settings.receipt_signing_key_hex = None
|
||||
|
||||
|
||||
def test_receipt_signing_with_attestation(session: Session):
|
||||
signing_key = SigningKey.generate()
|
||||
attest_key = SigningKey.generate()
|
||||
settings.receipt_signing_key_hex = signing_key.encode().hex()
|
||||
settings.receipt_attestation_key_hex = attest_key.encode().hex()
|
||||
|
||||
job_service = JobService(session)
|
||||
miner_service = MinerService(session)
|
||||
receipt_service = ReceiptService(session)
|
||||
|
||||
miner_service.register(
|
||||
"miner-attest",
|
||||
MinerRegister(capabilities={"price": 1.0}, concurrency=1),
|
||||
)
|
||||
|
||||
job = job_service.create_job(
|
||||
"client-attest",
|
||||
JobCreate(payload={"task": "attest"}),
|
||||
)
|
||||
|
||||
receipt = receipt_service.create_receipt(
|
||||
job,
|
||||
"miner-attest",
|
||||
{"units": 1.0, "unit_type": "gpu_seconds", "price": 2.0},
|
||||
{"units": 1.0},
|
||||
)
|
||||
|
||||
assert receipt is not None
|
||||
assert receipt.get("signature") is not None
|
||||
attestations = receipt.get("attestations")
|
||||
assert attestations is not None and len(attestations) == 1
|
||||
|
||||
stored_receipts = session.exec(select(JobReceipt).where(JobReceipt.job_id == job.id)).all()
|
||||
assert len(stored_receipts) == 1
|
||||
assert stored_receipts[0].receipt_id == receipt["receipt_id"]
|
||||
|
||||
payload = {k: v for k, v in receipt.items() if k not in {"signature", "attestations"}}
|
||||
|
||||
miner_verifier = ReceiptVerifier(signing_key.verify_key.encode())
|
||||
assert miner_verifier.verify(payload, receipt["signature"]) is True
|
||||
|
||||
attest_verifier = ReceiptVerifier(attest_key.verify_key.encode())
|
||||
assert attest_verifier.verify(payload, attestations[0]) is True
|
||||
|
||||
settings.receipt_signing_key_hex = None
|
||||
settings.receipt_attestation_key_hex = None
|
||||
|
||||
Reference in New Issue
Block a user