chore(systemd): remove obsolete systemd service files and update infrastructure documentation
- Remove 8 unused systemd service files from coordinator-api/systemd/ - aitbc-adaptive-learning.service (port 8005) - aitbc-advanced-ai.service - aitbc-enterprise-api.service - aitbc-gpu-multimodal.service (port 8003) - aitbc-marketplace-enhanced.service (port 8006) - aitbc-modality-optimization.service (port 8004) - aitbc-multimodal.service (port 8002) - aitbc-openclaw-enhanced.service (port 8007
This commit is contained in:
89
dev/gpu/deploy_gpu_all_in_one.sh
Normal file
89
dev/gpu/deploy_gpu_all_in_one.sh
Normal file
@@ -0,0 +1,89 @@
|
||||
#!/bin/bash
|
||||
# Deploy GPU Miner to AITBC Container - All in One
|
||||
|
||||
set -e
|
||||
|
||||
echo "🚀 Deploying GPU Miner to AITBC Container..."
|
||||
|
||||
# Step 1: Copy files
|
||||
echo "1. Copying GPU scripts..."
|
||||
scp -o StrictHostKeyChecking=no /home/oib/windsurf/aitbc/gpu_registry_demo.py aitbc:/home/oib/
|
||||
scp -o StrictHostKeyChecking=no /home/oib/windsurf/aitbc/gpu_miner_with_wait.py aitbc:/home/oib/
|
||||
|
||||
# Step 2: Install Python and deps
|
||||
echo "2. Installing Python and dependencies..."
|
||||
ssh aitbc 'sudo apt-get update -qq'
|
||||
ssh aitbc 'sudo apt-get install -y -qq python3 python3-venv python3-pip'
|
||||
ssh aitbc 'python3 -m venv /home/oib/.venv-gpu'
|
||||
ssh aitbc '/home/oib/.venv-gpu/bin/pip install -q fastapi uvicorn httpx psutil'
|
||||
|
||||
# Step 3: Create GPU registry service
|
||||
echo "3. Creating GPU registry service..."
|
||||
ssh aitbc "sudo tee /etc/systemd/system/aitbc-gpu-registry.service >/dev/null <<'EOF'
|
||||
[Unit]
|
||||
Description=AITBC GPU Registry
|
||||
After=network.target
|
||||
|
||||
[Service]
|
||||
Type=simple
|
||||
User=oib
|
||||
WorkingDirectory=/home/oib
|
||||
ExecStart=/home/oib/.venv-gpu/bin/python /home/oib/gpu_registry_demo.py
|
||||
Restart=always
|
||||
RestartSec=5
|
||||
StandardOutput=journal
|
||||
StandardError=journal
|
||||
|
||||
[Install]
|
||||
WantedBy=multi-user.target
|
||||
EOF"
|
||||
|
||||
# Step 4: Start GPU registry
|
||||
echo "4. Starting GPU registry..."
|
||||
ssh aitbc 'sudo systemctl daemon-reload'
|
||||
ssh aitbc 'sudo systemctl enable --now aitbc-gpu-registry.service'
|
||||
|
||||
# Step 5: Create GPU miner service
|
||||
echo "5. Creating GPU miner service..."
|
||||
ssh aitbc "sudo tee /etc/systemd/system/aitbc-gpu-miner.service >/dev/null <<'EOF'
|
||||
[Unit]
|
||||
Description=AITBC GPU Miner Client
|
||||
After=network.target aitbc-gpu-registry.service
|
||||
Wants=aitbc-gpu-registry.service
|
||||
|
||||
[Service]
|
||||
Type=simple
|
||||
User=oib
|
||||
WorkingDirectory=/home/oib
|
||||
ExecStart=/home/oib/.venv-gpu/bin/python /home/oib/gpu_miner_with_wait.py
|
||||
Restart=always
|
||||
RestartSec=10
|
||||
StandardOutput=journal
|
||||
StandardError=journal
|
||||
|
||||
[Install]
|
||||
WantedBy=multi-user.target
|
||||
EOF"
|
||||
|
||||
# Step 6: Start GPU miner
|
||||
echo "6. Starting GPU miner..."
|
||||
ssh aitbc 'sudo systemctl daemon-reload'
|
||||
ssh aitbc 'sudo systemctl enable --now aitbc-gpu-miner.service'
|
||||
|
||||
# Step 7: Check services
|
||||
echo "7. Checking services..."
|
||||
echo -e "\n=== GPU Registry Service ==="
|
||||
ssh aitbc 'sudo systemctl status aitbc-gpu-registry.service --no-pager'
|
||||
|
||||
echo -e "\n=== GPU Miner Service ==="
|
||||
ssh aitbc 'sudo systemctl status aitbc-gpu-miner.service --no-pager'
|
||||
|
||||
# Step 8: Verify GPU registration
|
||||
echo -e "\n8. Verifying GPU registration..."
|
||||
sleep 3
|
||||
echo " curl http://10.1.223.93:8091/miners/list"
|
||||
curl -s http://10.1.223.93:8091/miners/list | python3 -c "import sys,json; data=json.load(sys.stdin); print(f'✅ Found {len(data.get(\"gpus\", []))} GPU(s)'); [print(f' - {gpu[\"capabilities\"][\"gpu\"][\"model\"]} ({gpu[\"capabilities\"][\"gpu\"][\"memory_gb\"]}GB)') for gpu in data.get('gpus', [])]"
|
||||
|
||||
echo -e "\n✅ Deployment complete!"
|
||||
echo "GPU Registry: http://10.1.223.93:8091"
|
||||
echo "GPU Miner: Running and sending heartbeats"
|
||||
89
dev/gpu/deploy_gpu_container.sh
Normal file
89
dev/gpu/deploy_gpu_container.sh
Normal file
@@ -0,0 +1,89 @@
|
||||
#!/bin/bash
|
||||
# Deploy GPU Miner to AITBC Container
|
||||
|
||||
echo "🚀 Deploying GPU Miner to AITBC Container..."
|
||||
|
||||
# Check if container is accessible
|
||||
echo "1. Checking container access..."
|
||||
sudo incus exec aitbc -- whoami
|
||||
|
||||
# Copy GPU miner files
|
||||
echo "2. Copying GPU miner files..."
|
||||
sudo incus file push /home/oib/windsurf/aitbc/gpu_miner_with_wait.py aitbc/home/oib/
|
||||
sudo incus file push /home/oib/windsurf/aitbc/gpu_registry_demo.py aitbc/home/oib/
|
||||
|
||||
# Install dependencies
|
||||
echo "3. Installing dependencies..."
|
||||
sudo incus exec aitbc -- pip install httpx fastapi uvicorn psutil
|
||||
|
||||
# Create GPU miner service
|
||||
echo "4. Creating GPU miner service..."
|
||||
cat << 'EOF' | sudo tee /tmp/gpu-miner.service
|
||||
[Unit]
|
||||
Description=AITBC GPU Miner Client
|
||||
After=network.target
|
||||
|
||||
[Service]
|
||||
Type=simple
|
||||
User=oib
|
||||
WorkingDirectory=/home/oib
|
||||
ExecStart=/usr/bin/python3 gpu_miner_with_wait.py
|
||||
Restart=always
|
||||
RestartSec=30
|
||||
StandardOutput=journal
|
||||
StandardError=journal
|
||||
|
||||
[Install]
|
||||
WantedBy=multi-user.target
|
||||
EOF
|
||||
|
||||
sudo incus file push /tmp/gpu-miner.service aitbc/tmp/
|
||||
sudo incus exec aitbc -- sudo mv /tmp/gpu-miner.service /etc/systemd/system/
|
||||
sudo incus exec aitbc -- sudo systemctl daemon-reload
|
||||
sudo incus exec aitbc -- sudo systemctl enable gpu-miner.service
|
||||
sudo incus exec aitbc -- sudo systemctl start gpu-miner.service
|
||||
|
||||
# Create GPU registry service
|
||||
echo "5. Creating GPU registry service..."
|
||||
cat << 'EOF' | sudo tee /tmp/gpu-registry.service
|
||||
[Unit]
|
||||
Description=AITBC GPU Registry
|
||||
After=network.target
|
||||
|
||||
[Service]
|
||||
Type=simple
|
||||
User=oib
|
||||
WorkingDirectory=/home/oib
|
||||
ExecStart=/usr/bin/python3 gpu_registry_demo.py
|
||||
Restart=always
|
||||
RestartSec=10
|
||||
StandardOutput=journal
|
||||
StandardError=journal
|
||||
|
||||
[Install]
|
||||
WantedBy=multi-user.target
|
||||
EOF
|
||||
|
||||
sudo incus file push /tmp/gpu-registry.service aitbc/tmp/
|
||||
sudo incus exec aitbc -- sudo mv /tmp/gpu-registry.service /etc/systemd/system/
|
||||
sudo incus exec aitbc -- sudo systemctl daemon-reload
|
||||
sudo incus exec aitbc -- sudo systemctl enable gpu-registry.service
|
||||
sudo incus exec aitbc -- sudo systemctl start gpu-registry.service
|
||||
|
||||
# Check services
|
||||
echo "6. Checking services..."
|
||||
echo "GPU Miner Service:"
|
||||
sudo incus exec aitbc -- sudo systemctl status gpu-miner.service --no-pager
|
||||
|
||||
echo -e "\nGPU Registry Service:"
|
||||
sudo incus exec aitbc -- sudo systemctl status gpu-registry.service --no-pager
|
||||
|
||||
# Show access URLs
|
||||
echo -e "\n✅ Deployment complete!"
|
||||
echo "Access URLs:"
|
||||
echo " - Container IP: 10.1.223.93"
|
||||
echo " - GPU Registry: http://10.1.223.93:8091/miners/list"
|
||||
echo " - Coordinator API: http://10.1.223.93:8000"
|
||||
|
||||
echo -e "\nTo check GPU status:"
|
||||
echo " curl http://10.1.223.93:8091/miners/list"
|
||||
92
dev/gpu/gpu_exchange_status.py
Normal file
92
dev/gpu/gpu_exchange_status.py
Normal file
@@ -0,0 +1,92 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
GPU Exchange Integration Demo
|
||||
Shows how the GPU miner is integrated with the exchange
|
||||
"""
|
||||
|
||||
import json
|
||||
import httpx
|
||||
import subprocess
|
||||
import time
|
||||
from datetime import datetime
|
||||
|
||||
print("🔗 AITBC GPU Exchange Integration")
|
||||
print("=" * 50)
|
||||
|
||||
# Check GPU Registry
|
||||
print("\n1. 📊 Checking GPU Registry...")
|
||||
try:
|
||||
response = httpx.get("http://localhost:8091/miners/list")
|
||||
if response.status_code == 200:
|
||||
data = response.json()
|
||||
gpus = data.get("gpus", [])
|
||||
print(f" Found {len(gpus)} registered GPU(s)")
|
||||
|
||||
for gpu in gpus:
|
||||
print(f"\n 🎮 GPU Details:")
|
||||
print(f" Model: {gpu['capabilities']['gpu']['model']}")
|
||||
print(f" Memory: {gpu['capabilities']['gpu']['memory_gb']} GB")
|
||||
print(f" CUDA: {gpu['capabilities']['gpu']['cuda_version']}")
|
||||
print(f" Status: {gpu.get('status', 'Unknown')}")
|
||||
print(f" Region: {gpu.get('region', 'Unknown')}")
|
||||
else:
|
||||
print(" ❌ GPU Registry not accessible")
|
||||
except Exception as e:
|
||||
print(f" ❌ Error: {e}")
|
||||
|
||||
# Check Exchange
|
||||
print("\n2. 💰 Checking Trade Exchange...")
|
||||
try:
|
||||
response = httpx.get("http://localhost:3002")
|
||||
if response.status_code == 200:
|
||||
print(" ✅ Trade Exchange is running")
|
||||
print(" 🌐 URL: http://localhost:3002")
|
||||
else:
|
||||
print(" ❌ Trade Exchange not responding")
|
||||
except:
|
||||
print(" ❌ Trade Exchange not accessible")
|
||||
|
||||
# Check Blockchain
|
||||
print("\n3. ⛓️ Checking Blockchain Node...")
|
||||
try:
|
||||
response = httpx.get("http://localhost:9080/rpc/head")
|
||||
if response.status_code == 200:
|
||||
data = response.json()
|
||||
print(f" ✅ Blockchain Node active")
|
||||
print(f" Block Height: {data.get('height', 'Unknown')}")
|
||||
print(f" Block Hash: {data.get('hash', 'Unknown')[:16]}...")
|
||||
else:
|
||||
print(" ❌ Blockchain Node not responding")
|
||||
except:
|
||||
print(" ❌ Blockchain Node not accessible")
|
||||
|
||||
# Show Integration Points
|
||||
print("\n4. 🔌 Integration Points:")
|
||||
print(" • GPU Registry: http://localhost:8091/miners/list")
|
||||
print(" • Trade Exchange: http://localhost:3002")
|
||||
print(" • Blockchain RPC: http://localhost:9080")
|
||||
print(" • GPU Marketplace: Exchange > Browse GPU Marketplace")
|
||||
|
||||
# Show API Usage
|
||||
print("\n5. 📡 API Usage Examples:")
|
||||
print("\n Get registered GPUs:")
|
||||
print(" curl http://localhost:8091/miners/list")
|
||||
print("\n Get GPU details:")
|
||||
print(" curl http://localhost:8091/miners/localhost-gpu-miner")
|
||||
print("\n Get blockchain info:")
|
||||
print(" curl http://localhost:9080/rpc/head")
|
||||
|
||||
# Show Current Status
|
||||
print("\n6. 📈 Current System Status:")
|
||||
print(" ✅ GPU Miner: Running (systemd)")
|
||||
print(" ✅ GPU Registry: Running on port 8091")
|
||||
print(" ✅ Trade Exchange: Running on port 3002")
|
||||
print(" ✅ Blockchain Node: Running on port 9080")
|
||||
|
||||
print("\n" + "=" * 50)
|
||||
print("🎯 GPU is successfully integrated with the exchange!")
|
||||
print("\nNext steps:")
|
||||
print("1. Open http://localhost:3002 in your browser")
|
||||
print("2. Click 'Browse GPU Marketplace'")
|
||||
print("3. View the registered RTX 4060 Ti GPU")
|
||||
print("4. Purchase GPU compute time with AITBC tokens")
|
||||
467
dev/gpu/gpu_miner_host.py
Normal file
467
dev/gpu/gpu_miner_host.py
Normal file
@@ -0,0 +1,467 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Real GPU Miner Client for AITBC - runs on host with actual GPU
|
||||
"""
|
||||
|
||||
import json
|
||||
import time
|
||||
import httpx
|
||||
import logging
|
||||
import sys
|
||||
import subprocess
|
||||
import os
|
||||
from datetime import datetime
|
||||
from typing import Dict, Optional
|
||||
|
||||
# Configuration
|
||||
COORDINATOR_URL = os.environ.get("COORDINATOR_URL", "http://127.0.0.1:9080")
|
||||
MINER_ID = os.environ.get("MINER_API_KEY", "miner_test")
|
||||
AUTH_TOKEN = os.environ.get("MINER_API_KEY", "miner_test")
|
||||
HEARTBEAT_INTERVAL = 15
|
||||
MAX_RETRIES = 10
|
||||
RETRY_DELAY = 30
|
||||
|
||||
# Setup logging with explicit configuration
|
||||
LOG_PATH = "/home/oib/windsurf/aitbc/logs/host_gpu_miner.log"
|
||||
os.makedirs(os.path.dirname(LOG_PATH), exist_ok=True)
|
||||
|
||||
class FlushHandler(logging.StreamHandler):
|
||||
def emit(self, record):
|
||||
super().emit(record)
|
||||
self.flush()
|
||||
|
||||
logging.basicConfig(
|
||||
level=logging.INFO,
|
||||
format='%(asctime)s - %(levelname)s - %(message)s',
|
||||
handlers=[
|
||||
FlushHandler(sys.stdout),
|
||||
logging.FileHandler(LOG_PATH)
|
||||
]
|
||||
)
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# Force stdout to be unbuffered
|
||||
sys.stdout.reconfigure(line_buffering=True)
|
||||
sys.stderr.reconfigure(line_buffering=True)
|
||||
|
||||
ARCH_MAP = {
|
||||
"4090": "ada_lovelace",
|
||||
"4080": "ada_lovelace",
|
||||
"4070": "ada_lovelace",
|
||||
"4060": "ada_lovelace",
|
||||
"3090": "ampere",
|
||||
"3080": "ampere",
|
||||
"3070": "ampere",
|
||||
"3060": "ampere",
|
||||
"2080": "turing",
|
||||
"2070": "turing",
|
||||
"2060": "turing",
|
||||
"1080": "pascal",
|
||||
"1070": "pascal",
|
||||
"1060": "pascal",
|
||||
}
|
||||
|
||||
|
||||
def classify_architecture(name: str) -> str:
|
||||
upper = name.upper()
|
||||
for key, arch in ARCH_MAP.items():
|
||||
if key in upper:
|
||||
return arch
|
||||
if "A100" in upper or "V100" in upper or "P100" in upper:
|
||||
return "datacenter"
|
||||
return "unknown"
|
||||
|
||||
|
||||
def detect_cuda_version() -> Optional[str]:
|
||||
try:
|
||||
result = subprocess.run(["nvidia-smi", "--query-gpu=driver_version", "--format=csv,noheader"],
|
||||
capture_output=True, text=True, timeout=5)
|
||||
if result.returncode == 0:
|
||||
return result.stdout.strip()
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to detect CUDA/driver version: {e}")
|
||||
return None
|
||||
|
||||
|
||||
def build_gpu_capabilities() -> Dict:
|
||||
gpu_info = get_gpu_info()
|
||||
cuda_version = detect_cuda_version() or "unknown"
|
||||
model = gpu_info["name"] if gpu_info else "Unknown GPU"
|
||||
memory_total = gpu_info["memory_total"] if gpu_info else 0
|
||||
arch = classify_architecture(model) if model else "unknown"
|
||||
edge_optimized = arch in {"ada_lovelace", "ampere", "turing"}
|
||||
|
||||
return {
|
||||
"gpu": {
|
||||
"model": model,
|
||||
"architecture": arch,
|
||||
"consumer_grade": True,
|
||||
"edge_optimized": edge_optimized,
|
||||
"memory_gb": memory_total,
|
||||
"cuda_version": cuda_version,
|
||||
"platform": "CUDA",
|
||||
"supported_tasks": ["inference", "training", "stable-diffusion", "llama"],
|
||||
"max_concurrent_jobs": 1
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
def measure_coordinator_latency() -> float:
|
||||
start = time.time()
|
||||
try:
|
||||
resp = httpx.get(f"{COORDINATOR_URL}/v1/health", timeout=3)
|
||||
if resp.status_code == 200:
|
||||
return (time.time() - start) * 1000
|
||||
except Exception:
|
||||
pass
|
||||
return -1.0
|
||||
|
||||
|
||||
def get_gpu_info():
|
||||
"""Get real GPU information"""
|
||||
try:
|
||||
result = subprocess.run(['nvidia-smi', '--query-gpu=name,memory.total,memory.used,utilization.gpu',
|
||||
'--format=csv,noheader,nounits'],
|
||||
capture_output=True, text=True, timeout=5)
|
||||
if result.returncode == 0:
|
||||
info = result.stdout.strip().split(', ')
|
||||
return {
|
||||
"name": info[0],
|
||||
"memory_total": int(info[1]),
|
||||
"memory_used": int(info[2]),
|
||||
"utilization": int(info[3])
|
||||
}
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to get GPU info: {e}")
|
||||
return None
|
||||
|
||||
def check_ollama():
|
||||
"""Check if Ollama is running and has models"""
|
||||
try:
|
||||
response = httpx.get("http://localhost:11434/api/tags", timeout=5)
|
||||
if response.status_code == 200:
|
||||
models = response.json().get('models', [])
|
||||
model_names = [m['name'] for m in models]
|
||||
logger.info(f"Ollama running with models: {model_names}")
|
||||
return True, model_names
|
||||
else:
|
||||
logger.error("Ollama not responding")
|
||||
return False, []
|
||||
except Exception as e:
|
||||
logger.error(f"Ollama check failed: {e}")
|
||||
return False, []
|
||||
|
||||
def wait_for_coordinator():
|
||||
"""Wait for coordinator to be available"""
|
||||
for i in range(MAX_RETRIES):
|
||||
try:
|
||||
response = httpx.get(f"{COORDINATOR_URL}/v1/health", timeout=5)
|
||||
if response.status_code == 200:
|
||||
logger.info("Coordinator is available!")
|
||||
return True
|
||||
except:
|
||||
pass
|
||||
|
||||
logger.info(f"Waiting for coordinator... ({i+1}/{MAX_RETRIES})")
|
||||
time.sleep(RETRY_DELAY)
|
||||
|
||||
logger.error("Coordinator not available after max retries")
|
||||
return False
|
||||
|
||||
def register_miner():
|
||||
"""Register the miner with the coordinator"""
|
||||
register_data = {
|
||||
"capabilities": build_gpu_capabilities(),
|
||||
"concurrency": 1,
|
||||
"region": "localhost"
|
||||
}
|
||||
|
||||
headers = {
|
||||
"X-Api-Key": AUTH_TOKEN,
|
||||
"Content-Type": "application/json"
|
||||
}
|
||||
|
||||
try:
|
||||
response = httpx.post(
|
||||
f"{COORDINATOR_URL}/v1/miners/register?miner_id={MINER_ID}",
|
||||
json=register_data,
|
||||
headers=headers,
|
||||
timeout=10
|
||||
)
|
||||
|
||||
if response.status_code == 200:
|
||||
data = response.json()
|
||||
logger.info(f"Successfully registered miner: {data}")
|
||||
return data.get("session_token", "demo-token")
|
||||
else:
|
||||
logger.error(f"Registration failed: {response.status_code} - {response.text}")
|
||||
return None
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Registration error: {e}")
|
||||
return None
|
||||
|
||||
def send_heartbeat():
|
||||
"""Send heartbeat to coordinator with real GPU stats"""
|
||||
gpu_info = get_gpu_info()
|
||||
arch = classify_architecture(gpu_info["name"]) if gpu_info else "unknown"
|
||||
latency_ms = measure_coordinator_latency()
|
||||
|
||||
if gpu_info:
|
||||
heartbeat_data = {
|
||||
"status": "active",
|
||||
"current_jobs": 0,
|
||||
"last_seen": datetime.utcnow().isoformat(),
|
||||
"gpu_utilization": gpu_info["utilization"],
|
||||
"memory_used": gpu_info["memory_used"],
|
||||
"memory_total": gpu_info["memory_total"],
|
||||
"architecture": arch,
|
||||
"edge_optimized": arch in {"ada_lovelace", "ampere", "turing"},
|
||||
"network_latency_ms": latency_ms,
|
||||
}
|
||||
else:
|
||||
heartbeat_data = {
|
||||
"status": "active",
|
||||
"current_jobs": 0,
|
||||
"last_seen": datetime.utcnow().isoformat(),
|
||||
"gpu_utilization": 0,
|
||||
"memory_used": 0,
|
||||
"memory_total": 0,
|
||||
"architecture": "unknown",
|
||||
"edge_optimized": False,
|
||||
"network_latency_ms": latency_ms,
|
||||
}
|
||||
|
||||
headers = {
|
||||
"X-Api-Key": AUTH_TOKEN,
|
||||
"Content-Type": "application/json"
|
||||
}
|
||||
|
||||
try:
|
||||
response = httpx.post(
|
||||
f"{COORDINATOR_URL}/v1/miners/heartbeat?miner_id={MINER_ID}",
|
||||
json=heartbeat_data,
|
||||
headers=headers,
|
||||
timeout=5
|
||||
)
|
||||
|
||||
if response.status_code == 200:
|
||||
logger.info(f"Heartbeat sent (GPU: {gpu_info['utilization'] if gpu_info else 'N/A'}%)")
|
||||
else:
|
||||
logger.error(f"Heartbeat failed: {response.status_code} - {response.text}")
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Heartbeat error: {e}")
|
||||
|
||||
def execute_job(job, available_models):
|
||||
"""Execute a job using real GPU resources"""
|
||||
job_id = job.get('job_id')
|
||||
payload = job.get('payload', {})
|
||||
|
||||
logger.info(f"Executing job {job_id}: {payload}")
|
||||
|
||||
try:
|
||||
if payload.get('type') == 'inference':
|
||||
# Get the prompt and model
|
||||
prompt = payload.get('prompt', '')
|
||||
model = payload.get('model', 'llama3.2:latest')
|
||||
|
||||
# Check if model is available
|
||||
if model not in available_models:
|
||||
# Use first available model
|
||||
if available_models:
|
||||
model = available_models[0]
|
||||
logger.info(f"Using available model: {model}")
|
||||
else:
|
||||
raise Exception("No models available in Ollama")
|
||||
|
||||
# Call Ollama API for real GPU inference
|
||||
logger.info(f"Running inference on GPU with model: {model}")
|
||||
start_time = time.time()
|
||||
|
||||
ollama_response = httpx.post(
|
||||
"http://localhost:11434/api/generate",
|
||||
json={
|
||||
"model": model,
|
||||
"prompt": prompt,
|
||||
"stream": False
|
||||
},
|
||||
timeout=60
|
||||
)
|
||||
|
||||
if ollama_response.status_code == 200:
|
||||
result = ollama_response.json()
|
||||
output = result.get('response', '')
|
||||
execution_time = time.time() - start_time
|
||||
|
||||
# Get GPU stats after execution
|
||||
gpu_after = get_gpu_info()
|
||||
|
||||
# Submit result back to coordinator
|
||||
submit_result(job_id, {
|
||||
"result": {
|
||||
"status": "completed",
|
||||
"output": output,
|
||||
"model": model,
|
||||
"tokens_processed": result.get('eval_count', 0),
|
||||
"execution_time": execution_time,
|
||||
"gpu_used": True
|
||||
},
|
||||
"metrics": {
|
||||
"gpu_utilization": gpu_after["utilization"] if gpu_after else 0,
|
||||
"memory_used": gpu_after["memory_used"] if gpu_after else 0,
|
||||
"memory_peak": max(gpu_after["memory_used"] if gpu_after else 0, 2048)
|
||||
}
|
||||
})
|
||||
|
||||
logger.info(f"Job {job_id} completed in {execution_time:.2f}s")
|
||||
return True
|
||||
else:
|
||||
logger.error(f"Ollama error: {ollama_response.status_code}")
|
||||
submit_result(job_id, {
|
||||
"result": {
|
||||
"status": "failed",
|
||||
"error": f"Ollama error: {ollama_response.text}"
|
||||
}
|
||||
})
|
||||
return False
|
||||
else:
|
||||
# Unsupported job type
|
||||
logger.error(f"Unsupported job type: {payload.get('type')}")
|
||||
submit_result(job_id, {
|
||||
"result": {
|
||||
"status": "failed",
|
||||
"error": f"Unsupported job type: {payload.get('type')}"
|
||||
}
|
||||
})
|
||||
return False
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Job execution error: {e}")
|
||||
submit_result(job_id, {
|
||||
"result": {
|
||||
"status": "failed",
|
||||
"error": str(e)
|
||||
}
|
||||
})
|
||||
return False
|
||||
|
||||
def submit_result(job_id, result):
|
||||
"""Submit job result to coordinator"""
|
||||
headers = {
|
||||
"X-Api-Key": AUTH_TOKEN,
|
||||
"Content-Type": "application/json"
|
||||
}
|
||||
|
||||
try:
|
||||
response = httpx.post(
|
||||
f"{COORDINATOR_URL}/v1/miners/{job_id}/result",
|
||||
json=result,
|
||||
headers=headers,
|
||||
timeout=10
|
||||
)
|
||||
|
||||
if response.status_code == 200:
|
||||
logger.info(f"Result submitted for job {job_id}")
|
||||
else:
|
||||
logger.error(f"Result submission failed: {response.status_code} - {response.text}")
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Result submission error: {e}")
|
||||
|
||||
def poll_for_jobs():
|
||||
"""Poll for available jobs"""
|
||||
poll_data = {
|
||||
"max_wait_seconds": 5
|
||||
}
|
||||
|
||||
headers = {
|
||||
"X-Api-Key": AUTH_TOKEN,
|
||||
"Content-Type": "application/json"
|
||||
}
|
||||
|
||||
try:
|
||||
response = httpx.post(
|
||||
f"{COORDINATOR_URL}/v1/miners/poll",
|
||||
json=poll_data,
|
||||
headers=headers,
|
||||
timeout=10
|
||||
)
|
||||
|
||||
if response.status_code == 200:
|
||||
job = response.json()
|
||||
logger.info(f"Received job: {job}")
|
||||
return job
|
||||
elif response.status_code == 204:
|
||||
return None
|
||||
else:
|
||||
logger.error(f"Poll failed: {response.status_code} - {response.text}")
|
||||
return None
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error polling for jobs: {e}")
|
||||
return None
|
||||
|
||||
def main():
|
||||
"""Main miner loop"""
|
||||
logger.info("Starting Real GPU Miner Client on Host...")
|
||||
|
||||
# Check GPU availability
|
||||
gpu_info = get_gpu_info()
|
||||
if not gpu_info:
|
||||
logger.error("GPU not available, exiting")
|
||||
sys.exit(1)
|
||||
|
||||
logger.info(f"GPU detected: {gpu_info['name']} ({gpu_info['memory_total']}MB)")
|
||||
|
||||
# Check Ollama
|
||||
ollama_available, models = check_ollama()
|
||||
if not ollama_available:
|
||||
logger.error("Ollama not available - please install and start Ollama")
|
||||
sys.exit(1)
|
||||
|
||||
logger.info(f"Ollama models available: {', '.join(models)}")
|
||||
|
||||
# Wait for coordinator
|
||||
if not wait_for_coordinator():
|
||||
sys.exit(1)
|
||||
|
||||
# Register with coordinator
|
||||
session_token = register_miner()
|
||||
if not session_token:
|
||||
logger.error("Failed to register, exiting")
|
||||
sys.exit(1)
|
||||
|
||||
logger.info("Miner registered successfully, starting main loop...")
|
||||
|
||||
# Main loop
|
||||
last_heartbeat = 0
|
||||
last_poll = 0
|
||||
|
||||
try:
|
||||
while True:
|
||||
current_time = time.time()
|
||||
|
||||
# Send heartbeat
|
||||
if current_time - last_heartbeat >= HEARTBEAT_INTERVAL:
|
||||
send_heartbeat()
|
||||
last_heartbeat = current_time
|
||||
|
||||
# Poll for jobs
|
||||
if current_time - last_poll >= 3:
|
||||
job = poll_for_jobs()
|
||||
if job:
|
||||
# Execute the job with real GPU
|
||||
execute_job(job, models)
|
||||
last_poll = current_time
|
||||
|
||||
time.sleep(1)
|
||||
|
||||
except KeyboardInterrupt:
|
||||
logger.info("Shutting down miner...")
|
||||
except Exception as e:
|
||||
logger.error(f"Error in main loop: {e}")
|
||||
sys.exit(1)
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
3
dev/gpu/gpu_miner_host_wrapper.sh
Executable file
3
dev/gpu/gpu_miner_host_wrapper.sh
Executable file
@@ -0,0 +1,3 @@
|
||||
#!/bin/bash
|
||||
# Wrapper script for GPU miner to ensure proper logging
|
||||
exec /home/oib/windsurf/aitbc/.venv/bin/python -u /home/oib/windsurf/aitbc/scripts/gpu/gpu_miner_host.py 2>&1
|
||||
72
dev/gpu/gpu_registry_demo.py
Normal file
72
dev/gpu/gpu_registry_demo.py
Normal file
@@ -0,0 +1,72 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Simple GPU Registry Server for demonstration
|
||||
"""
|
||||
|
||||
from fastapi import FastAPI, HTTPException
|
||||
from pydantic import BaseModel
|
||||
from typing import Dict, Any, Optional
|
||||
import uvicorn
|
||||
from datetime import datetime
|
||||
|
||||
app = FastAPI(title="GPU Registry Demo")
|
||||
|
||||
# In-memory storage
|
||||
registered_gpus: Dict[str, Dict] = {}
|
||||
|
||||
class GPURegistration(BaseModel):
|
||||
capabilities: Dict[str, Any]
|
||||
concurrency: int = 1
|
||||
region: Optional[str] = None
|
||||
|
||||
class Heartbeat(BaseModel):
|
||||
inflight: int = 0
|
||||
status: str = "ONLINE"
|
||||
metadata: Dict[str, Any] = {}
|
||||
|
||||
@app.get("/")
|
||||
async def root():
|
||||
return {"message": "GPU Registry Demo", "registered_gpus": len(registered_gpus)}
|
||||
|
||||
@app.get("/health")
|
||||
async def health():
|
||||
return {"status": "ok"}
|
||||
|
||||
@app.post("/miners/register")
|
||||
async def register_gpu(miner_id: str, gpu_data: GPURegistration):
|
||||
"""Register a GPU miner"""
|
||||
registered_gpus[miner_id] = {
|
||||
"id": miner_id,
|
||||
"registered_at": datetime.utcnow().isoformat(),
|
||||
"last_heartbeat": datetime.utcnow().isoformat(),
|
||||
**gpu_data.dict()
|
||||
}
|
||||
return {"status": "ok", "message": f"GPU {miner_id} registered successfully"}
|
||||
|
||||
@app.post("/miners/heartbeat")
|
||||
async def heartbeat(miner_id: str, heartbeat_data: Heartbeat):
|
||||
"""Receive heartbeat from GPU miner"""
|
||||
if miner_id not in registered_gpus:
|
||||
raise HTTPException(status_code=404, detail="GPU not registered")
|
||||
|
||||
registered_gpus[miner_id]["last_heartbeat"] = datetime.utcnow().isoformat()
|
||||
registered_gpus[miner_id]["status"] = heartbeat_data.status
|
||||
registered_gpus[miner_id]["metadata"] = heartbeat_data.metadata
|
||||
|
||||
return {"status": "ok"}
|
||||
|
||||
@app.get("/miners/list")
|
||||
async def list_gpus():
|
||||
"""List all registered GPUs"""
|
||||
return {"gpus": list(registered_gpus.values())}
|
||||
|
||||
@app.get("/miners/{miner_id}")
|
||||
async def get_gpu(miner_id: str):
|
||||
"""Get details of a specific GPU"""
|
||||
if miner_id not in registered_gpus:
|
||||
raise HTTPException(status_code=404, detail="GPU not registered")
|
||||
return registered_gpus[miner_id]
|
||||
|
||||
if __name__ == "__main__":
|
||||
print("Starting GPU Registry Demo on http://localhost:8091")
|
||||
uvicorn.run(app, host="0.0.0.0", port=8091)
|
||||
146
dev/gpu/integrate_gpu_exchange.py
Normal file
146
dev/gpu/integrate_gpu_exchange.py
Normal file
@@ -0,0 +1,146 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Integrate GPU Miner with existing Trade Exchange
|
||||
"""
|
||||
|
||||
import httpx
|
||||
import json
|
||||
import subprocess
|
||||
import time
|
||||
from datetime import datetime
|
||||
|
||||
# Configuration
|
||||
EXCHANGE_URL = "http://localhost:3002"
|
||||
GPU_REGISTRY_URL = "http://localhost:8091"
|
||||
|
||||
def update_exchange_with_gpu():
|
||||
"""Update the exchange frontend to show registered GPUs"""
|
||||
|
||||
# Read the exchange HTML
|
||||
with open('/home/oib/windsurf/aitbc/apps/trade-exchange/index.html', 'r') as f:
|
||||
html_content = f.read()
|
||||
|
||||
# Add GPU marketplace integration
|
||||
gpu_integration = """
|
||||
<script>
|
||||
// GPU Integration
|
||||
async function loadRealGPUOffers() {
|
||||
try {
|
||||
const response = await fetch('http://localhost:8091/miners/list');
|
||||
const data = await response.json();
|
||||
|
||||
if (data.gpus && data.gpus.length > 0) {
|
||||
displayRealGPUOffers(data.gpus);
|
||||
} else {
|
||||
displayDemoOffers();
|
||||
}
|
||||
} catch (error) {
|
||||
console.log('Using demo GPU offers');
|
||||
displayDemoOffers();
|
||||
}
|
||||
}
|
||||
|
||||
function displayRealGPUOffers(gpus) {
|
||||
const container = document.getElementById('gpuList');
|
||||
container.innerHTML = '';
|
||||
|
||||
gpus.forEach(gpu => {
|
||||
const gpuCard = `
|
||||
<div class="bg-white rounded-lg shadow-lg p-6 card-hover">
|
||||
<div class="flex justify-between items-start mb-4">
|
||||
<h3 class="text-lg font-semibold">${gpu.capabilities.gpu.model}</h3>
|
||||
<span class="bg-green-100 text-green-800 px-2 py-1 rounded text-sm">Available</span>
|
||||
</div>
|
||||
<div class="space-y-2 text-sm text-gray-600 mb-4">
|
||||
<p><i data-lucide="monitor" class="w-4 h-4 inline mr-1"></i>Memory: ${gpu.capabilities.gpu.memory_gb} GB</p>
|
||||
<p><i data-lucide="zap" class="w-4 h-4 inline mr-1"></i>CUDA: ${gpu.capabilities.gpu.cuda_version}</p>
|
||||
<p><i data-lucide="cpu" class="w-4 h-4 inline mr-1"></i>Concurrency: ${gpu.concurrency}</p>
|
||||
<p><i data-lucide="map-pin" class="w-4 h-4 inline mr-1"></i>Region: ${gpu.region}</p>
|
||||
</div>
|
||||
<div class="flex justify-between items-center">
|
||||
<span class="text-2xl font-bold text-purple-600">50 AITBC/hr</span>
|
||||
<button onclick="purchaseGPU('${gpu.id}')" class="bg-purple-600 text-white px-4 py-2 rounded hover:bg-purple-700 transition">
|
||||
Purchase
|
||||
</button>
|
||||
</div>
|
||||
</div>
|
||||
`;
|
||||
container.innerHTML += gpuCard;
|
||||
});
|
||||
|
||||
lucide.createIcons();
|
||||
}
|
||||
|
||||
// Override the loadGPUOffers function
|
||||
const originalLoadGPUOffers = loadGPUOffers;
|
||||
loadGPUOffers = loadRealGPUOffers;
|
||||
</script>
|
||||
"""
|
||||
|
||||
# Insert before closing body tag
|
||||
if '</body>' in html_content:
|
||||
html_content = html_content.replace('</body>', gpu_integration + '</body>')
|
||||
|
||||
# Write back to file
|
||||
with open('/home/oib/windsurf/aitbc/apps/trade-exchange/index.html', 'w') as f:
|
||||
f.write(html_content)
|
||||
|
||||
print("✅ Updated exchange with GPU integration!")
|
||||
else:
|
||||
print("❌ Could not find </body> tag in exchange HTML")
|
||||
|
||||
def create_gpu_api_endpoint():
|
||||
"""Create an API endpoint in the exchange to serve GPU data"""
|
||||
|
||||
api_code = """
|
||||
@app.get("/api/gpu/offers")
|
||||
async def get_gpu_offers():
|
||||
\"\"\"Get available GPU offers\"\"\"
|
||||
try:
|
||||
# Fetch from GPU registry
|
||||
response = httpx.get("http://localhost:8091/miners/list")
|
||||
if response.status_code == 200:
|
||||
data = response.json()
|
||||
return {"offers": data.get("gpus", [])}
|
||||
except:
|
||||
pass
|
||||
|
||||
# Return demo data if registry not available
|
||||
return {
|
||||
"offers": [{
|
||||
"id": "demo-gpu-1",
|
||||
"model": "NVIDIA RTX 4060 Ti",
|
||||
"memory_gb": 16,
|
||||
"price_per_hour": 50,
|
||||
"available": True
|
||||
}]
|
||||
}
|
||||
"""
|
||||
|
||||
print("\n📝 To add GPU API endpoint to exchange, add this code to simple_exchange_api.py:")
|
||||
print(api_code)
|
||||
|
||||
def main():
|
||||
print("🔗 Integrating GPU Miner with Trade Exchange...")
|
||||
|
||||
# Update exchange frontend
|
||||
update_exchange_with_gpu()
|
||||
|
||||
# Show API integration code
|
||||
create_gpu_api_endpoint()
|
||||
|
||||
print("\n📊 Integration Summary:")
|
||||
print("1. ✅ Exchange frontend updated to show real GPUs")
|
||||
print("2. 📝 See above for API endpoint code")
|
||||
print("3. 🌐 Access the exchange at: http://localhost:3002")
|
||||
print("4. 🎯 GPU Registry available at: http://localhost:8091/miners/list")
|
||||
|
||||
print("\n🔄 To see the integrated GPU marketplace:")
|
||||
print("1. Restart the trade exchange if needed:")
|
||||
print(" cd /home/oib/windsurf/aitbc/apps/trade-exchange")
|
||||
print(" python simple_exchange_api.py")
|
||||
print("2. Open http://localhost:3002 in browser")
|
||||
print("3. Click 'Browse GPU Marketplace'")
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
115
dev/gpu/miner_workflow.py
Normal file
115
dev/gpu/miner_workflow.py
Normal file
@@ -0,0 +1,115 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Complete miner workflow - poll for jobs and assign proposer
|
||||
"""
|
||||
|
||||
import httpx
|
||||
import json
|
||||
import time
|
||||
from datetime import datetime
|
||||
|
||||
# Configuration
|
||||
COORDINATOR_URL = "http://localhost:8001"
|
||||
MINER_API_KEY = "${MINER_API_KEY}"
|
||||
MINER_ID = "localhost-gpu-miner"
|
||||
|
||||
def poll_and_accept_job():
|
||||
"""Poll for a job and accept it"""
|
||||
print("🔍 Polling for jobs...")
|
||||
|
||||
with httpx.Client() as client:
|
||||
# Poll for a job
|
||||
response = client.post(
|
||||
f"{COORDINATOR_URL}/v1/miners/poll",
|
||||
headers={
|
||||
"Content-Type": "application/json",
|
||||
"X-Api-Key": MINER_API_KEY
|
||||
},
|
||||
json={"max_wait_seconds": 5}
|
||||
)
|
||||
|
||||
if response.status_code == 200:
|
||||
job = response.json()
|
||||
print(f"✅ Received job: {job['job_id']}")
|
||||
print(f" Task: {job['payload'].get('task', 'unknown')}")
|
||||
|
||||
# Simulate processing
|
||||
print("⚙️ Processing job...")
|
||||
time.sleep(2)
|
||||
|
||||
# Submit result
|
||||
result_data = {
|
||||
"result": {
|
||||
"status": "completed",
|
||||
"output": f"Job {job['job_id']} completed successfully",
|
||||
"execution_time_ms": 2000,
|
||||
"miner_id": MINER_ID
|
||||
},
|
||||
"metrics": {
|
||||
"compute_time": 2.0,
|
||||
"energy_used": 0.1
|
||||
}
|
||||
}
|
||||
|
||||
print(f"📤 Submitting result for job {job['job_id']}...")
|
||||
result_response = client.post(
|
||||
f"{COORDINATOR_URL}/v1/miners/{job['job_id']}/result",
|
||||
headers={
|
||||
"Content-Type": "application/json",
|
||||
"X-Api-Key": MINER_API_KEY
|
||||
},
|
||||
json=result_data
|
||||
)
|
||||
|
||||
if result_response.status_code == 200:
|
||||
print("✅ Result submitted successfully!")
|
||||
return job['job_id']
|
||||
else:
|
||||
print(f"❌ Failed to submit result: {result_response.status_code}")
|
||||
print(f" Response: {result_response.text}")
|
||||
return None
|
||||
|
||||
elif response.status_code == 204:
|
||||
print("ℹ️ No jobs available")
|
||||
return None
|
||||
else:
|
||||
print(f"❌ Failed to poll: {response.status_code}")
|
||||
return None
|
||||
|
||||
def check_block_proposer(job_id):
|
||||
"""Check if the block now has a proposer"""
|
||||
print(f"\n🔍 Checking proposer for job {job_id}...")
|
||||
|
||||
with httpx.Client() as client:
|
||||
response = client.get(f"{COORDINATOR_URL}/v1/explorer/blocks")
|
||||
|
||||
if response.status_code == 200:
|
||||
blocks = response.json()
|
||||
for block in blocks['items']:
|
||||
if block['hash'] == job_id:
|
||||
print(f"📦 Block Info:")
|
||||
print(f" Height: {block['height']}")
|
||||
print(f" Hash: {block['hash']}")
|
||||
print(f" Proposer: {block['proposer']}")
|
||||
print(f" Time: {block['timestamp']}")
|
||||
return block
|
||||
return None
|
||||
|
||||
def main():
|
||||
print("⛏️ AITBC Miner Workflow Demo")
|
||||
print(f" Miner ID: {MINER_ID}")
|
||||
print(f" Coordinator: {COORDINATOR_URL}")
|
||||
print()
|
||||
|
||||
# Poll and accept a job
|
||||
job_id = poll_and_accept_job()
|
||||
|
||||
if job_id:
|
||||
# Check if the block has a proposer now
|
||||
time.sleep(1) # Give the server a moment to update
|
||||
check_block_proposer(job_id)
|
||||
else:
|
||||
print("\n💡 Tip: Create a job first using example_client_remote.py")
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
32
dev/gpu/start_gpu_miner.sh
Executable file
32
dev/gpu/start_gpu_miner.sh
Executable file
@@ -0,0 +1,32 @@
|
||||
#!/bin/bash
|
||||
# Start GPU Miner Client
|
||||
|
||||
echo "=== AITBC GPU Miner Client Startup ==="
|
||||
echo "Starting GPU miner client..."
|
||||
echo ""
|
||||
|
||||
# Check if GPU is available
|
||||
if ! command -v nvidia-smi &> /dev/null; then
|
||||
echo "WARNING: nvidia-smi not found, GPU may not be available"
|
||||
fi
|
||||
|
||||
# Show GPU info
|
||||
if command -v nvidia-smi &> /dev/null; then
|
||||
echo "=== GPU Status ==="
|
||||
nvidia-smi --query-gpu=name,memory.used,memory.total,utilization.gpu,temperature.gpu --format=csv,noheader,nounits
|
||||
echo ""
|
||||
fi
|
||||
|
||||
# Check if coordinator is running
|
||||
echo "=== Checking Coordinator API ==="
|
||||
if curl -s http://localhost:9080/health > /dev/null 2>&1; then
|
||||
echo "✓ Coordinator API is running on port 9080"
|
||||
else
|
||||
echo "✗ Coordinator API is not accessible on port 9080"
|
||||
echo " The miner will wait for the coordinator to start..."
|
||||
fi
|
||||
|
||||
echo ""
|
||||
echo "=== Starting GPU Miner ==="
|
||||
cd /home/oib/windsurf/aitbc
|
||||
exec python3 scripts/gpu/gpu_miner_host.py
|
||||
52
dev/gpu/start_gpu_miner.sh.example
Normal file
52
dev/gpu/start_gpu_miner.sh.example
Normal file
@@ -0,0 +1,52 @@
|
||||
#!/bin/bash
|
||||
|
||||
# AITBC GPU Miner Startup Script
|
||||
# Copy to start_gpu_miner.sh and adjust variables for your environment
|
||||
|
||||
set -e
|
||||
|
||||
# === CONFIGURE THESE ===
|
||||
COORDINATOR_URL="http://YOUR_COORDINATOR_IP:18000"
|
||||
MINER_API_KEY="your_miner_api_key"
|
||||
OLLAMA_HOST="http://127.0.0.1:11434"
|
||||
GPU_ID="gpu-0"
|
||||
|
||||
echo "🔧 Starting AITBC GPU Miner"
|
||||
echo "Coordinator: $COORDINATOR_URL"
|
||||
echo "Ollama: $OLLAMA_HOST"
|
||||
echo ""
|
||||
|
||||
# Check Ollama is running
|
||||
if ! curl -s "$OLLAMA_HOST/api/tags" > /dev/null 2>&1; then
|
||||
echo "❌ Ollama not running at $OLLAMA_HOST"
|
||||
echo "Start it with: ollama serve"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
echo "✅ Ollama is running"
|
||||
|
||||
# Check GPU
|
||||
if command -v nvidia-smi &> /dev/null; then
|
||||
echo "GPU detected:"
|
||||
nvidia-smi --query-gpu=name,memory.total --format=csv,noheader
|
||||
else
|
||||
echo "⚠️ No NVIDIA GPU detected (CPU-only mode)"
|
||||
fi
|
||||
|
||||
# Register miner
|
||||
echo ""
|
||||
echo "Registering miner with coordinator..."
|
||||
curl -s -X POST "$COORDINATOR_URL/v1/miners/register" \
|
||||
-H "X-Api-Key: $MINER_API_KEY" \
|
||||
-H "Content-Type: application/json" \
|
||||
-d "{\"gpu_id\": \"$GPU_ID\", \"ollama_url\": \"$OLLAMA_HOST\"}"
|
||||
|
||||
echo ""
|
||||
echo "✅ Miner registered. Starting heartbeat loop..."
|
||||
|
||||
# Heartbeat + job polling loop
|
||||
while true; do
|
||||
curl -s -X POST "$COORDINATOR_URL/v1/miners/heartbeat" \
|
||||
-H "X-Api-Key: $MINER_API_KEY" > /dev/null 2>&1
|
||||
sleep 10
|
||||
done
|
||||
Reference in New Issue
Block a user