Files
aitbc/scripts/services/gpu/gpu_miner_host.py
aitbc e4f1a96172
Some checks failed
Blockchain Synchronization Verification / sync-verification (push) Failing after 8s
CLI Tests / test-cli (push) Successful in 10s
Contract Performance Benchmarks / benchmark-gas-usage (push) Successful in 1m22s
Contract Performance Benchmarks / benchmark-execution-time (push) Successful in 1m11s
Contract Performance Benchmarks / benchmark-throughput (push) Successful in 1m13s
Cross-Chain Functionality Tests / test-cross-chain-sync (push) Failing after 5s
Cross-Chain Functionality Tests / test-cross-chain-transactions (push) Successful in 5s
Cross-Chain Functionality Tests / test-cross-chain-bridge (push) Has been skipped
Cross-Chain Functionality Tests / test-multi-chain-consensus (push) Failing after 3s
Cross-Chain Functionality Tests / aggregate-results (push) Has been skipped
Cross-Node Transaction Testing / transaction-test (push) Successful in 5s
Deploy to Testnet / deploy-testnet (push) Successful in 1m14s
Contract Performance Benchmarks / compare-benchmarks (push) Has been cancelled
Documentation Validation / validate-docs (push) Failing after 10s
Multi-Node Stress Testing / stress-test (push) Has been cancelled
Node Failover Simulation / failover-test (push) Has been cancelled
Security Scanning / security-scan (push) Has been cancelled
Smart Contract Tests / test-solidity (map[name:aitbc-contracts path:contracts]) (push) Has been cancelled
Smart Contract Tests / test-solidity (map[name:aitbc-token path:packages/solidity/aitbc-token]) (push) Has been cancelled
Smart Contract Tests / test-foundry (push) Has been cancelled
Smart Contract Tests / lint-solidity (push) Has been cancelled
Smart Contract Tests / deploy-contracts (push) Has been cancelled
Documentation Validation / validate-policies-strict (push) Successful in 3s
Integration Tests / test-service-integration (push) Failing after 45s
Multi-Chain Island Architecture Tests / test-multi-chain-island (push) Failing after 2s
Multi-Node Blockchain Health Monitoring / health-check (push) Successful in 5s
P2P Network Verification / p2p-verification (push) Successful in 3s
Production Tests / Production Integration Tests (push) Failing after 7s
Python Tests / test-python (push) Failing after 46s
Staking Tests / test-staking-service (push) Failing after 2s
Staking Tests / test-staking-integration (push) Has been skipped
Staking Tests / test-staking-contract (push) Has been skipped
Staking Tests / run-staking-test-runner (push) Has been skipped
Systemd Sync / sync-systemd (push) Successful in 21s
API Endpoint Tests / test-api-endpoints (push) Failing after 12m19s
ci: standardize pytest invocation and add security scanning
- Changed pytest calls to use `venv/bin/python -m pytest` with explicit config
- Added `--rootdir "$PWD"` and `--import-mode=importlib` for consistent imports
- Fixed PYTHONPATH to use absolute paths with $PWD prefix
- Added smart contract security scanning for Solidity files
- Added Circom circuit security checks for ZK proof circuits
- Added ZK proof implementation security validation
- Added contracts/** to security scanning workflow
2026-05-11 13:46:42 +02:00

475 lines
15 KiB
Python

#!/usr/bin/env python3
"""
Real GPU Miner Client for AITBC - runs on host with actual GPU
Supports both Ollama and vLLM for inference
"""
import json
import time
import httpx
import logging
import sys
import subprocess
import os
from datetime import datetime, timezone
# Configuration
COORDINATOR_URL = "http://127.0.0.1:8011"
MINER_ID = "${MINER_API_KEY}"
AUTH_TOKEN = "${MINER_API_KEY}"
HEARTBEAT_INTERVAL = 15
MAX_RETRIES = 10
RETRY_DELAY = 30
INFERENCE_BACKEND = os.getenv("INFERENCE_BACKEND", "auto") # auto, ollama, vllm
# Setup logging with explicit configuration
LOG_PATH = "/home/oib/windsurf/aitbc/logs/host_gpu_miner.log"
os.makedirs(os.path.dirname(LOG_PATH), exist_ok=True)
class FlushHandler(logging.StreamHandler):
def emit(self, record):
super().emit(record)
self.flush()
logging.basicConfig(
level=logging.INFO,
format='%(asctime)s - %(levelname)s - %(message)s',
handlers=[
FlushHandler(sys.stdout),
logging.FileHandler(LOG_PATH)
]
)
logger = logging.getLogger(__name__)
# Force stdout to be unbuffered
sys.stdout.reconfigure(line_buffering=True)
sys.stderr.reconfigure(line_buffering=True)
# GPU capabilities (RTX 4060 Ti)
GPU_CAPABILITIES = {
"gpu": {
"model": "NVIDIA GeForce RTX 4060 Ti",
"memory_gb": 16,
"cuda_version": "12.4",
"platform": "CUDA",
"supported_tasks": ["inference", "training", "stable-diffusion", "llama"],
"max_concurrent_jobs": 1
}
}
def get_gpu_info():
"""Get real GPU information"""
try:
result = subprocess.run(['nvidia-smi', '--query-gpu=name,memory.total,memory.used,utilization.gpu',
'--format=csv,noheader,nounits'],
capture_output=True, text=True, timeout=5)
if result.returncode == 0:
info = result.stdout.strip().split(', ')
return {
"name": info[0],
"memory_total": int(info[1]),
"memory_used": int(info[2]),
"utilization": int(info[3])
}
except Exception as e:
logger.error(f"Failed to get GPU info: {e}")
return None
def check_ollama():
"""Check if Ollama is running and has models"""
try:
response = httpx.get("http://localhost:11434/api/tags", timeout=5)
if response.status_code == 200:
models = response.json().get('models', [])
model_names = [m['name'] for m in models]
logger.info(f"Ollama running with models: {model_names}")
return True, model_names
else:
logger.error("Ollama not responding")
return False, []
except Exception as e:
logger.error(f"Ollama check failed: {e}")
return False, []
def check_vllm():
"""Check if vLLM is available and can load models"""
try:
from vllm import LLM
# Test basic vLLM functionality
logger.info("vLLM is available")
return True, ["vllm"]
except ImportError:
logger.warning("vLLM not installed")
return False, []
except Exception as e:
logger.error(f"vLLM check failed: {e}")
return False, []
def detect_inference_backend():
"""Detect available inference backend"""
if INFERENCE_BACKEND == "ollama":
available, models = check_ollama()
return "ollama", models if available else []
elif INFERENCE_BACKEND == "vllm":
available, models = check_vllm()
return "vllm", models if available else []
else: # auto
# Try vLLM first (more optimized)
vllm_available, vllm_models = check_vllm()
if vllm_available:
return "vllm", vllm_models
# Fall back to Ollama
ollama_available, ollama_models = check_ollama()
if ollama_available:
return "ollama", ollama_models
logger.error("No inference backend available")
return None, []
def wait_for_coordinator():
"""Wait for coordinator to be available"""
for i in range(MAX_RETRIES):
try:
response = httpx.get(f"{COORDINATOR_URL}/v1/health", timeout=5)
if response.status_code == 200:
logger.info("Coordinator is available!")
return True
except:
pass
logger.info(f"Waiting for coordinator... ({i+1}/{MAX_RETRIES})")
time.sleep(RETRY_DELAY)
logger.error("Coordinator not available after max retries")
return False
def register_miner():
"""Register the miner with the coordinator"""
register_data = {
"capabilities": GPU_CAPABILITIES,
"concurrency": 1,
"region": "localhost"
}
headers = {
"X-Api-Key": AUTH_TOKEN,
"Content-Type": "application/json"
}
try:
response = httpx.post(
f"{COORDINATOR_URL}/v1/miners/register?miner_id={MINER_ID}",
json=register_data,
headers=headers,
timeout=10
)
if response.status_code == 200:
data = response.json()
logger.info(f"Successfully registered miner: {data}")
return data.get("session_token", "demo-token")
else:
logger.error(f"Registration failed: {response.status_code} - {response.text}")
return None
except Exception as e:
logger.error(f"Registration error: {e}")
return None
def send_heartbeat():
"""Send heartbeat to coordinator with real GPU stats"""
gpu_info = get_gpu_info()
if gpu_info:
heartbeat_data = {
"status": "active",
"current_jobs": 0,
"last_seen": datetime.now(timezone.utc).isoformat(),
"gpu_utilization": gpu_info["utilization"],
"memory_used": gpu_info["memory_used"],
"memory_total": gpu_info["memory_total"]
}
else:
heartbeat_data = {
"status": "active",
"current_jobs": 0,
"last_seen": datetime.now(timezone.utc).isoformat(),
"gpu_utilization": 0,
"memory_used": 0,
}
headers = {
"X-Api-Key": AUTH_TOKEN,
"Content-Type": "application/json"
}
try:
response = httpx.post(
f"{COORDINATOR_URL}/v1/miners/heartbeat?miner_id={MINER_ID}",
json=heartbeat_data,
headers=headers,
timeout=5
)
if response.status_code == 200:
logger.info(f"Heartbeat sent (GPU: {gpu_info['utilization'] if gpu_info else 'N/A'}%)")
else:
logger.error(f"Heartbeat failed: {response.status_code} - {response.text}")
except Exception as e:
logger.error(f"Heartbeat error: {e}")
def execute_job_with_ollama(job_id, prompt, model):
"""Execute job using Ollama"""
logger.info(f"Running inference with Ollama model: {model}")
start_time = time.time()
ollama_response = httpx.post(
"http://localhost:11434/api/generate",
json={
"model": model,
"prompt": prompt,
"stream": False
},
timeout=60
)
if ollama_response.status_code == 200:
result = ollama_response.json()
output = result.get('response', '')
execution_time = time.time() - start_time
return {
"output": output,
"model": model,
"tokens_processed": result.get('eval_count', 0),
"execution_time": execution_time
}
else:
raise Exception(f"Ollama error: {ollama_response.status_code}")
def execute_job_with_vllm(job_id, prompt, model):
"""Execute job using vLLM"""
logger.info(f"Running inference with vLLM model: {model}")
start_time = time.time()
try:
from vllm import LLM
# Initialize vLLM with the model
# Note: vLLM uses HuggingFace model names
llm = LLM(model=model, trust_remote_code=True)
# Generate response
outputs = llm.generate([prompt])
output = outputs[0].outputs[0].text
execution_time = time.time() - start_time
return {
"output": output,
"model": model,
"tokens_processed": len(outputs[0].outputs[0].token_ids),
"execution_time": execution_time
}
except Exception as e:
raise Exception(f"vLLM error: {e}")
def execute_job(job, available_models, backend):
"""Execute a job using real GPU resources"""
job_id = job.get('job_id')
payload = job.get('payload', {})
logger.info(f"Executing job {job_id} with backend: {backend}")
try:
if payload.get('type') == 'inference':
# Get the prompt and model
prompt = payload.get('prompt', '')
model = payload.get('model', 'llama3.2:latest')
# Convert Ollama model name to vLLM format if needed
if backend == "vllm":
# vLLM uses HuggingFace model names
# Map common Ollama models to vLLM equivalents
model_mapping = {
"llama2:7b": "meta-llama/Llama-2-7b-hf",
"llama3.2:latest": "meta-llama/Llama-3.2-3B-Instruct",
"llama3.2:3b": "meta-llama/Llama-3.2-3B-Instruct",
"qwen3:8b": "Qwen/Qwen2.5-7B-Instruct",
}
model = model_mapping.get(model, model)
# Execute with appropriate backend
if backend == "ollama":
result = execute_job_with_ollama(job_id, prompt, model)
elif backend == "vllm":
result = execute_job_with_vllm(job_id, prompt, model)
else:
raise Exception(f"Unknown backend: {backend}")
# Get GPU stats after execution
gpu_after = get_gpu_info()
# Submit result back to coordinator
submit_result(job_id, {
"result": {
"status": "completed",
"output": result["output"],
"model": result["model"],
"tokens_processed": result["tokens_processed"],
"execution_time": result["execution_time"],
"gpu_used": True,
"backend": backend
},
"metrics": {
"gpu_utilization": gpu_after["utilization"] if gpu_after else 0,
"memory_used": gpu_after["memory_used"] if gpu_after else 0,
"memory_peak": max(gpu_after["memory_used"] if gpu_after else 0, 2048)
}
})
logger.info(f"Job {job_id} completed in {result['execution_time']:.2f}s")
return True
else:
# Unsupported job type
logger.error(f"Unsupported job type: {payload.get('type')}")
submit_result(job_id, {
"result": {
"status": "failed",
"error": f"Unsupported job type: {payload.get('type')}"
}
})
return False
except Exception as e:
logger.error(f"Job execution error: {e}")
submit_result(job_id, {
"result": {
"status": "failed",
"error": str(e)
}
})
return False
def submit_result(job_id, result):
"""Submit job result to coordinator"""
headers = {
"X-Api-Key": AUTH_TOKEN,
"Content-Type": "application/json"
}
try:
response = httpx.post(
f"{COORDINATOR_URL}/v1/miners/{job_id}/result",
json=result,
headers=headers,
timeout=10
)
if response.status_code == 200:
logger.info(f"Result submitted for job {job_id}")
else:
logger.error(f"Result submission failed: {response.status_code} - {response.text}")
except Exception as e:
logger.error(f"Result submission error: {e}")
def poll_for_jobs():
"""Poll for available jobs"""
poll_data = {
"max_wait_seconds": 5
}
headers = {
"X-Api-Key": AUTH_TOKEN,
"Content-Type": "application/json"
}
try:
response = httpx.post(
f"{COORDINATOR_URL}/v1/miners/poll",
json=poll_data,
headers=headers,
timeout=10
)
if response.status_code == 200:
job = response.json()
logger.info(f"Received job: {job}")
return job
elif response.status_code == 204:
return None
else:
logger.error(f"Poll failed: {response.status_code} - {response.text}")
return None
except Exception as e:
logger.error(f"Error polling for jobs: {e}")
return None
def main():
"""Main miner loop"""
logger.info("Starting Real GPU Miner Client on Host...")
# Check GPU availability
gpu_info = get_gpu_info()
if not gpu_info:
logger.error("GPU not available, exiting")
sys.exit(1)
logger.info(f"GPU detected: {gpu_info['name']} ({gpu_info['memory_total']}MB)")
# Detect inference backend
backend, models = detect_inference_backend()
if not backend:
logger.error("No inference backend available - please install Ollama or vLLM")
sys.exit(1)
logger.info(f"Using inference backend: {backend}")
logger.info(f"Available models: {', '.join(models)}")
# Wait for coordinator
if not wait_for_coordinator():
sys.exit(1)
# Register with coordinator
session_token = register_miner()
if not session_token:
logger.error("Failed to register, exiting")
sys.exit(1)
logger.info("Miner registered successfully, starting main loop...")
# Main loop
last_heartbeat = 0
last_poll = 0
try:
while True:
current_time = time.time()
# Send heartbeat
if current_time - last_heartbeat >= HEARTBEAT_INTERVAL:
send_heartbeat()
last_heartbeat = current_time
# Poll for jobs
if current_time - last_poll >= 3:
job = poll_for_jobs()
if job:
# Execute the job with real GPU
execute_job(job, models, backend)
last_poll = current_time
time.sleep(1)
except KeyboardInterrupt:
logger.info("Shutting down miner...")
except Exception as e:
logger.error(f"Error in main loop: {e}")
sys.exit(1)
if __name__ == "__main__":
main()