```

chore: enhance .gitignore and remove obsolete documentation files - Reorganize .gitignore with categorized sections for better maintainability - Add comprehensive ignore patterns for Python, Node.js, databases, logs, and build artifacts - Add project-specific ignore rules for coordinator, explorer, and deployment files - Remove outdated documentation: BITCOIN-WALLET-SETUP.md, LOCAL_ASSETS_SUMMARY.md, README-CONTAINER-DEPLOYMENT.md, README-DOMAIN-DEPLOYMENT.md ```
2026-01-24 14:44:51 +01:00
parent 99bf335970
commit 9b9c5beb23
214 changed files with 25558 additions and 171 deletions
--- a/scripts/gpu/gpu_miner_host.py
+++ b/scripts/gpu/gpu_miner_host.py
@@ -0,0 +1,396 @@
+#!/usr/bin/env python3
+"""
+Real GPU Miner Client for AITBC - runs on host with actual GPU
+"""
+
+import json
+import time
+import httpx
+import logging
+import sys
+import subprocess
+import os
+from datetime import datetime
+
+# Configuration
+COORDINATOR_URL = "http://127.0.0.1:18000"
+MINER_ID = "REDACTED_MINER_KEY"
+AUTH_TOKEN = "REDACTED_MINER_KEY"
+HEARTBEAT_INTERVAL = 15
+MAX_RETRIES = 10
+RETRY_DELAY = 30
+
+# Setup logging with explicit configuration
+LOG_PATH = "/home/oib/windsurf/aitbc/logs/host_gpu_miner.log"
+os.makedirs(os.path.dirname(LOG_PATH), exist_ok=True)
+
+class FlushHandler(logging.StreamHandler):
+    def emit(self, record):
+        super().emit(record)
+        self.flush()
+
+logging.basicConfig(
+    level=logging.INFO,
+    format='%(asctime)s - %(levelname)s - %(message)s',
+    handlers=[
+        FlushHandler(sys.stdout),
+        logging.FileHandler(LOG_PATH)
+    ]
+)
+logger = logging.getLogger(__name__)
+
+# Force stdout to be unbuffered
+sys.stdout.reconfigure(line_buffering=True)
+sys.stderr.reconfigure(line_buffering=True)
+
+# GPU capabilities (RTX 4060 Ti)
+GPU_CAPABILITIES = {
+    "gpu": {
+        "model": "NVIDIA GeForce RTX 4060 Ti",
+        "memory_gb": 16,
+        "cuda_version": "12.4",
+        "platform": "CUDA",
+        "supported_tasks": ["inference", "training", "stable-diffusion", "llama"],
+        "max_concurrent_jobs": 1
+    }
+}
+
+def get_gpu_info():
+    """Get real GPU information"""
+    try:
+        result = subprocess.run(['nvidia-smi', '--query-gpu=name,memory.total,memory.used,utilization.gpu', 
+                               '--format=csv,noheader,nounits'], 
+                              capture_output=True, text=True, timeout=5)
+        if result.returncode == 0:
+            info = result.stdout.strip().split(', ')
+            return {
+                "name": info[0],
+                "memory_total": int(info[1]),
+                "memory_used": int(info[2]),
+                "utilization": int(info[3])
+            }
+    except Exception as e:
+        logger.error(f"Failed to get GPU info: {e}")
+    return None
+
+def check_ollama():
+    """Check if Ollama is running and has models"""
+    try:
+        response = httpx.get("http://localhost:11434/api/tags", timeout=5)
+        if response.status_code == 200:
+            models = response.json().get('models', [])
+            model_names = [m['name'] for m in models]
+            logger.info(f"Ollama running with models: {model_names}")
+            return True, model_names
+        else:
+            logger.error("Ollama not responding")
+            return False, []
+    except Exception as e:
+        logger.error(f"Ollama check failed: {e}")
+        return False, []
+
+def wait_for_coordinator():
+    """Wait for coordinator to be available"""
+    for i in range(MAX_RETRIES):
+        try:
+            response = httpx.get(f"{COORDINATOR_URL}/v1/health", timeout=5)
+            if response.status_code == 200:
+                logger.info("Coordinator is available!")
+                return True
+        except:
+            pass
+        
+        logger.info(f"Waiting for coordinator... ({i+1}/{MAX_RETRIES})")
+        time.sleep(RETRY_DELAY)
+    
+    logger.error("Coordinator not available after max retries")
+    return False
+
+def register_miner():
+    """Register the miner with the coordinator"""
+    register_data = {
+        "capabilities": GPU_CAPABILITIES,
+        "concurrency": 1,
+        "region": "localhost"
+    }
+    
+    headers = {
+        "X-Api-Key": AUTH_TOKEN,
+        "Content-Type": "application/json"
+    }
+    
+    try:
+        response = httpx.post(
+            f"{COORDINATOR_URL}/v1/miners/register?miner_id={MINER_ID}",
+            json=register_data,
+            headers=headers,
+            timeout=10
+        )
+        
+        if response.status_code == 200:
+            data = response.json()
+            logger.info(f"Successfully registered miner: {data}")
+            return data.get("session_token", "demo-token")
+        else:
+            logger.error(f"Registration failed: {response.status_code} - {response.text}")
+            return None
+            
+    except Exception as e:
+        logger.error(f"Registration error: {e}")
+        return None
+
+def send_heartbeat():
+    """Send heartbeat to coordinator with real GPU stats"""
+    gpu_info = get_gpu_info()
+    
+    if gpu_info:
+        heartbeat_data = {
+            "status": "active",
+            "current_jobs": 0,
+            "last_seen": datetime.utcnow().isoformat(),
+            "gpu_utilization": gpu_info["utilization"],
+            "memory_used": gpu_info["memory_used"],
+            "memory_total": gpu_info["memory_total"]
+        }
+    else:
+        heartbeat_data = {
+            "status": "active",
+            "current_jobs": 0,
+            "last_seen": datetime.utcnow().isoformat(),
+            "gpu_utilization": 0,
+            "memory_used": 0,
+        }
+    
+    headers = {
+        "X-Api-Key": AUTH_TOKEN,
+        "Content-Type": "application/json"
+    }
+    
+    try:
+        response = httpx.post(
+            f"{COORDINATOR_URL}/v1/miners/heartbeat?miner_id={MINER_ID}",
+            json=heartbeat_data,
+            headers=headers,
+            timeout=5
+        )
+        
+        if response.status_code == 200:
+            logger.info(f"Heartbeat sent (GPU: {gpu_info['utilization'] if gpu_info else 'N/A'}%)")
+        else:
+            logger.error(f"Heartbeat failed: {response.status_code} - {response.text}")
+            
+    except Exception as e:
+        logger.error(f"Heartbeat error: {e}")
+
+def execute_job(job, available_models):
+    """Execute a job using real GPU resources"""
+    job_id = job.get('job_id')
+    payload = job.get('payload', {})
+    
+    logger.info(f"Executing job {job_id}: {payload}")
+    
+    try:
+        if payload.get('type') == 'inference':
+            # Get the prompt and model
+            prompt = payload.get('prompt', '')
+            model = payload.get('model', 'llama3.2:latest')
+            
+            # Check if model is available
+            if model not in available_models:
+                # Use first available model
+                if available_models:
+                    model = available_models[0]
+                    logger.info(f"Using available model: {model}")
+                else:
+                    raise Exception("No models available in Ollama")
+            
+            # Call Ollama API for real GPU inference
+            logger.info(f"Running inference on GPU with model: {model}")
+            start_time = time.time()
+            
+            ollama_response = httpx.post(
+                "http://localhost:11434/api/generate",
+                json={
+                    "model": model,
+                    "prompt": prompt,
+                    "stream": False
+                },
+                timeout=60
+            )
+            
+            if ollama_response.status_code == 200:
+                result = ollama_response.json()
+                output = result.get('response', '')
+                execution_time = time.time() - start_time
+                
+                # Get GPU stats after execution
+                gpu_after = get_gpu_info()
+                
+                # Submit result back to coordinator
+                submit_result(job_id, {
+                    "result": {
+                        "status": "completed",
+                        "output": output,
+                        "model": model,
+                        "tokens_processed": result.get('eval_count', 0),
+                        "execution_time": execution_time,
+                        "gpu_used": True
+                    },
+                    "metrics": {
+                        "gpu_utilization": gpu_after["utilization"] if gpu_after else 0,
+                        "memory_used": gpu_after["memory_used"] if gpu_after else 0,
+                        "memory_peak": max(gpu_after["memory_used"] if gpu_after else 0, 2048)
+                    }
+                })
+                
+                logger.info(f"Job {job_id} completed in {execution_time:.2f}s")
+                return True
+            else:
+                logger.error(f"Ollama error: {ollama_response.status_code}")
+                submit_result(job_id, {
+                    "result": {
+                        "status": "failed",
+                        "error": f"Ollama error: {ollama_response.text}"
+                    }
+                })
+                return False
+        else:
+            # Unsupported job type
+            logger.error(f"Unsupported job type: {payload.get('type')}")
+            submit_result(job_id, {
+                "result": {
+                    "status": "failed",
+                    "error": f"Unsupported job type: {payload.get('type')}"
+                }
+            })
+            return False
+            
+    except Exception as e:
+        logger.error(f"Job execution error: {e}")
+        submit_result(job_id, {
+            "result": {
+                "status": "failed",
+                "error": str(e)
+            }
+        })
+        return False
+
+def submit_result(job_id, result):
+    """Submit job result to coordinator"""
+    headers = {
+        "X-Api-Key": AUTH_TOKEN,
+        "Content-Type": "application/json"
+    }
+    
+    try:
+        response = httpx.post(
+            f"{COORDINATOR_URL}/v1/miners/{job_id}/result",
+            json=result,
+            headers=headers,
+            timeout=10
+        )
+        
+        if response.status_code == 200:
+            logger.info(f"Result submitted for job {job_id}")
+        else:
+            logger.error(f"Result submission failed: {response.status_code} - {response.text}")
+            
+    except Exception as e:
+        logger.error(f"Result submission error: {e}")
+
+def poll_for_jobs():
+    """Poll for available jobs"""
+    poll_data = {
+        "max_wait_seconds": 5
+    }
+    
+    headers = {
+        "X-Api-Key": AUTH_TOKEN,
+        "Content-Type": "application/json"
+    }
+    
+    try:
+        response = httpx.post(
+            f"{COORDINATOR_URL}/v1/miners/poll",
+            json=poll_data,
+            headers=headers,
+            timeout=10
+        )
+        
+        if response.status_code == 200:
+            job = response.json()
+            logger.info(f"Received job: {job}")
+            return job
+        elif response.status_code == 204:
+            return None
+        else:
+            logger.error(f"Poll failed: {response.status_code} - {response.text}")
+            return None
+            
+    except Exception as e:
+        logger.error(f"Error polling for jobs: {e}")
+        return None
+
+def main():
+    """Main miner loop"""
+    logger.info("Starting Real GPU Miner Client on Host...")
+    
+    # Check GPU availability
+    gpu_info = get_gpu_info()
+    if not gpu_info:
+        logger.error("GPU not available, exiting")
+        sys.exit(1)
+    
+    logger.info(f"GPU detected: {gpu_info['name']} ({gpu_info['memory_total']}MB)")
+    
+    # Check Ollama
+    ollama_available, models = check_ollama()
+    if not ollama_available:
+        logger.error("Ollama not available - please install and start Ollama")
+        sys.exit(1)
+
+    logger.info(f"Ollama models available: {', '.join(models)}")
+    
+    # Wait for coordinator
+    if not wait_for_coordinator():
+        sys.exit(1)
+    
+    # Register with coordinator
+    session_token = register_miner()
+    if not session_token:
+        logger.error("Failed to register, exiting")
+        sys.exit(1)
+    
+    logger.info("Miner registered successfully, starting main loop...")
+    
+    # Main loop
+    last_heartbeat = 0
+    last_poll = 0
+    
+    try:
+        while True:
+            current_time = time.time()
+            
+            # Send heartbeat
+            if current_time - last_heartbeat >= HEARTBEAT_INTERVAL:
+                send_heartbeat()
+                last_heartbeat = current_time
+            
+            # Poll for jobs
+            if current_time - last_poll >= 3:
+                job = poll_for_jobs()
+                if job:
+                    # Execute the job with real GPU
+                    execute_job(job, models)
+                last_poll = current_time
+            
+            time.sleep(1)
+            
+    except KeyboardInterrupt:
+        logger.info("Shutting down miner...")
+    except Exception as e:
+        logger.error(f"Error in main loop: {e}")
+        sys.exit(1)
+
+if __name__ == "__main__":
+    main()