chore: initialize monorepo with project scaffolding, configs, and CI setup
This commit is contained in:
1
apps/coordinator-api/src/app/routers/__init__.py
Normal file
1
apps/coordinator-api/src/app/routers/__init__.py
Normal file
@@ -0,0 +1 @@
|
||||
"""Router modules for the coordinator API."""
|
||||
69
apps/coordinator-api/src/app/routers/admin.py
Normal file
69
apps/coordinator-api/src/app/routers/admin.py
Normal file
@@ -0,0 +1,69 @@
|
||||
from fastapi import APIRouter, Depends, HTTPException, status
|
||||
|
||||
from ..deps import require_admin_key
|
||||
from ..services import JobService, MinerService
|
||||
from ..storage import SessionDep
|
||||
|
||||
router = APIRouter(prefix="/admin", tags=["admin"])
|
||||
|
||||
|
||||
@router.get("/stats", summary="Get coordinator stats")
|
||||
async def get_stats(session: SessionDep, admin_key: str = Depends(require_admin_key())) -> dict[str, int]: # type: ignore[arg-type]
|
||||
service = JobService(session)
|
||||
from sqlmodel import func, select
|
||||
from ..domain import Job
|
||||
|
||||
total_jobs = session.exec(select(func.count()).select_from(Job)).one()
|
||||
active_jobs = session.exec(select(func.count()).select_from(Job).where(Job.state.in_(["QUEUED", "RUNNING"]))).one()
|
||||
|
||||
miner_service = MinerService(session)
|
||||
miners = miner_service.list_records()
|
||||
avg_job_duration = (
|
||||
sum(miner.average_job_duration_ms for miner in miners if miner.average_job_duration_ms) / max(len(miners), 1)
|
||||
)
|
||||
return {
|
||||
"total_jobs": int(total_jobs or 0),
|
||||
"active_jobs": int(active_jobs or 0),
|
||||
"online_miners": miner_service.online_count(),
|
||||
"avg_miner_job_duration_ms": avg_job_duration,
|
||||
}
|
||||
|
||||
|
||||
@router.get("/jobs", summary="List jobs")
|
||||
async def list_jobs(session: SessionDep, admin_key: str = Depends(require_admin_key())) -> dict[str, list[dict]]: # type: ignore[arg-type]
|
||||
from ..domain import Job
|
||||
|
||||
jobs = session.exec(select(Job).order_by(Job.requested_at.desc()).limit(100)).all()
|
||||
return {
|
||||
"items": [
|
||||
{
|
||||
"job_id": job.id,
|
||||
"state": job.state,
|
||||
"client_id": job.client_id,
|
||||
"assigned_miner_id": job.assigned_miner_id,
|
||||
"requested_at": job.requested_at.isoformat(),
|
||||
}
|
||||
for job in jobs
|
||||
]
|
||||
}
|
||||
|
||||
|
||||
@router.get("/miners", summary="List miners")
|
||||
async def list_miners(session: SessionDep, admin_key: str = Depends(require_admin_key())) -> dict[str, list[dict]]: # type: ignore[arg-type]
|
||||
miner_service = MinerService(session)
|
||||
miners = [
|
||||
{
|
||||
"miner_id": record.miner_id,
|
||||
"status": record.status,
|
||||
"inflight": record.inflight,
|
||||
"concurrency": record.concurrency,
|
||||
"region": record.region,
|
||||
"last_heartbeat": record.last_heartbeat.isoformat(),
|
||||
"average_job_duration_ms": record.average_job_duration_ms,
|
||||
"jobs_completed": record.jobs_completed,
|
||||
"jobs_failed": record.jobs_failed,
|
||||
"last_receipt_id": record.last_receipt_id,
|
||||
}
|
||||
for record in miner_service.list_records()
|
||||
]
|
||||
return {"items": miners}
|
||||
97
apps/coordinator-api/src/app/routers/client.py
Normal file
97
apps/coordinator-api/src/app/routers/client.py
Normal file
@@ -0,0 +1,97 @@
|
||||
from fastapi import APIRouter, Depends, HTTPException, status
|
||||
|
||||
from ..deps import require_client_key
|
||||
from ..models import JobCreate, JobView, JobResult
|
||||
from ..services import JobService
|
||||
from ..storage import SessionDep
|
||||
|
||||
router = APIRouter(tags=["client"])
|
||||
|
||||
@router.post("/jobs", response_model=JobView, status_code=status.HTTP_201_CREATED, summary="Submit a job")
|
||||
async def submit_job(
|
||||
req: JobCreate,
|
||||
session: SessionDep,
|
||||
client_id: str = Depends(require_client_key()),
|
||||
) -> JobView: # type: ignore[arg-type]
|
||||
service = JobService(session)
|
||||
job = service.create_job(client_id, req)
|
||||
return service.to_view(job)
|
||||
|
||||
|
||||
@router.get("/jobs/{job_id}", response_model=JobView, summary="Get job status")
|
||||
async def get_job(
|
||||
job_id: str,
|
||||
session: SessionDep,
|
||||
client_id: str = Depends(require_client_key()),
|
||||
) -> JobView: # type: ignore[arg-type]
|
||||
service = JobService(session)
|
||||
try:
|
||||
job = service.get_job(job_id, client_id=client_id)
|
||||
except KeyError:
|
||||
raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail="job not found")
|
||||
return service.to_view(job)
|
||||
|
||||
|
||||
@router.get("/jobs/{job_id}/result", response_model=JobResult, summary="Get job result")
|
||||
async def get_job_result(
|
||||
job_id: str,
|
||||
session: SessionDep,
|
||||
client_id: str = Depends(require_client_key()),
|
||||
) -> JobResult: # type: ignore[arg-type]
|
||||
service = JobService(session)
|
||||
try:
|
||||
job = service.get_job(job_id, client_id=client_id)
|
||||
except KeyError:
|
||||
raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail="job not found")
|
||||
|
||||
if job.state not in {JobState.completed, JobState.failed, JobState.canceled, JobState.expired}:
|
||||
raise HTTPException(status_code=status.HTTP_425_TOO_EARLY, detail="job not ready")
|
||||
if job.result is None and job.receipt is None:
|
||||
raise HTTPException(status_code=status.HTTP_425_TOO_EARLY, detail="job not ready")
|
||||
return service.to_result(job)
|
||||
|
||||
|
||||
@router.post("/jobs/{job_id}/cancel", response_model=JobView, summary="Cancel job")
|
||||
async def cancel_job(
|
||||
job_id: str,
|
||||
session: SessionDep,
|
||||
client_id: str = Depends(require_client_key()),
|
||||
) -> JobView: # type: ignore[arg-type]
|
||||
service = JobService(session)
|
||||
try:
|
||||
job = service.get_job(job_id, client_id=client_id)
|
||||
except KeyError:
|
||||
raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail="job not found")
|
||||
|
||||
if job.state not in {JobState.queued, JobState.running}:
|
||||
raise HTTPException(status_code=status.HTTP_409_CONFLICT, detail="job not cancelable")
|
||||
|
||||
job = service.cancel_job(job)
|
||||
return service.to_view(job)
|
||||
|
||||
|
||||
@router.get("/jobs/{job_id}/receipt", summary="Get latest signed receipt")
|
||||
async def get_job_receipt(
|
||||
job_id: str,
|
||||
session: SessionDep,
|
||||
client_id: str = Depends(require_client_key()),
|
||||
) -> dict: # type: ignore[arg-type]
|
||||
service = JobService(session)
|
||||
try:
|
||||
job = service.get_job(job_id, client_id=client_id)
|
||||
except KeyError:
|
||||
raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail="job not found")
|
||||
if not job.receipt:
|
||||
raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail="receipt not available")
|
||||
return job.receipt
|
||||
|
||||
|
||||
@router.get("/jobs/{job_id}/receipts", summary="List signed receipts")
|
||||
async def list_job_receipts(
|
||||
job_id: str,
|
||||
session: SessionDep,
|
||||
client_id: str = Depends(require_client_key()),
|
||||
) -> dict: # type: ignore[arg-type]
|
||||
service = JobService(session)
|
||||
receipts = service.list_receipts(job_id, client_id=client_id)
|
||||
return {"items": [row.payload for row in receipts]}
|
||||
110
apps/coordinator-api/src/app/routers/miner.py
Normal file
110
apps/coordinator-api/src/app/routers/miner.py
Normal file
@@ -0,0 +1,110 @@
|
||||
from datetime import datetime
|
||||
from typing import Any
|
||||
|
||||
from fastapi import APIRouter, Depends, HTTPException, Response, status
|
||||
|
||||
from ..deps import require_miner_key
|
||||
from ..models import AssignedJob, JobFailSubmit, JobResultSubmit, JobState, MinerHeartbeat, MinerRegister, PollRequest
|
||||
from ..services import JobService, MinerService
|
||||
from ..services.receipts import ReceiptService
|
||||
from ..storage import SessionDep
|
||||
|
||||
router = APIRouter(tags=["miner"])
|
||||
|
||||
|
||||
@router.post("/miners/register", summary="Register or update miner")
|
||||
async def register(
|
||||
req: MinerRegister,
|
||||
session: SessionDep,
|
||||
miner_id: str = Depends(require_miner_key()),
|
||||
) -> dict[str, Any]: # type: ignore[arg-type]
|
||||
service = MinerService(session)
|
||||
record = service.register(miner_id, req)
|
||||
return {"status": "ok", "session_token": record.session_token}
|
||||
|
||||
@router.post("/miners/heartbeat", summary="Send miner heartbeat")
|
||||
async def heartbeat(
|
||||
req: MinerHeartbeat,
|
||||
session: SessionDep,
|
||||
miner_id: str = Depends(require_miner_key()),
|
||||
) -> dict[str, str]: # type: ignore[arg-type]
|
||||
try:
|
||||
MinerService(session).heartbeat(miner_id, req)
|
||||
except KeyError:
|
||||
raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail="miner not registered")
|
||||
return {"status": "ok"}
|
||||
|
||||
|
||||
# NOTE: until scheduling is fully implemented the poll endpoint performs a simple FIFO assignment.
|
||||
@router.post("/miners/poll", response_model=AssignedJob, summary="Poll for next job")
|
||||
async def poll(
|
||||
req: PollRequest,
|
||||
session: SessionDep,
|
||||
miner_id: str = Depends(require_miner_key()),
|
||||
) -> AssignedJob | Response: # type: ignore[arg-type]
|
||||
job = MinerService(session).poll(miner_id, req.max_wait_seconds)
|
||||
if job is None:
|
||||
return Response(status_code=status.HTTP_204_NO_CONTENT)
|
||||
return job
|
||||
|
||||
|
||||
@router.post("/miners/{job_id}/result", summary="Submit job result")
|
||||
async def submit_result(
|
||||
job_id: str,
|
||||
req: JobResultSubmit,
|
||||
session: SessionDep,
|
||||
miner_id: str = Depends(require_miner_key()),
|
||||
) -> dict[str, Any]: # type: ignore[arg-type]
|
||||
job_service = JobService(session)
|
||||
miner_service = MinerService(session)
|
||||
receipt_service = ReceiptService(session)
|
||||
try:
|
||||
job = job_service.get_job(job_id)
|
||||
except KeyError:
|
||||
raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail="job not found")
|
||||
|
||||
job.result = req.result
|
||||
job.state = JobState.completed
|
||||
job.error = None
|
||||
|
||||
metrics = dict(req.metrics or {})
|
||||
duration_ms = metrics.get("duration_ms")
|
||||
if duration_ms is None and job.requested_at:
|
||||
duration_ms = int((datetime.utcnow() - job.requested_at).total_seconds() * 1000)
|
||||
metrics["duration_ms"] = duration_ms
|
||||
|
||||
receipt = receipt_service.create_receipt(job, miner_id, req.result, metrics)
|
||||
job.receipt = receipt
|
||||
job.receipt_id = receipt["receipt_id"] if receipt else None
|
||||
session.add(job)
|
||||
session.commit()
|
||||
miner_service.release(
|
||||
miner_id,
|
||||
success=True,
|
||||
duration_ms=duration_ms,
|
||||
receipt_id=receipt["receipt_id"] if receipt else None,
|
||||
)
|
||||
return {"status": "ok", "receipt": receipt}
|
||||
|
||||
|
||||
@router.post("/miners/{job_id}/fail", summary="Submit job failure")
|
||||
async def submit_failure(
|
||||
job_id: str,
|
||||
req: JobFailSubmit,
|
||||
session: SessionDep,
|
||||
miner_id: str = Depends(require_miner_key()),
|
||||
) -> dict[str, str]: # type: ignore[arg-type]
|
||||
job_service = JobService(session)
|
||||
miner_service = MinerService(session)
|
||||
try:
|
||||
job = job_service.get_job(job_id)
|
||||
except KeyError:
|
||||
raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail="job not found")
|
||||
|
||||
job.state = JobState.failed
|
||||
job.error = f"{req.error_code}: {req.error_message}"
|
||||
job.assigned_miner_id = miner_id
|
||||
session.add(job)
|
||||
session.commit()
|
||||
miner_service.release(miner_id, success=False)
|
||||
return {"status": "ok"}
|
||||
Reference in New Issue
Block a user