Update database paths and fix foreign key references across coordinator API

- Change SQLite database path from `/home/oib/windsurf/aitbc/data/` to `/opt/data/` - Fix foreign key references to use correct table names (users, wallets, gpu_registry) - Replace governance router with new governance and community routers - Add multi-modal RL router to main application - Simplify DEPLOYMENT_READINESS_REPORT.md to focus on production deployment status - Update governance router with decentralized DAO voting
2026-02-26 19:32:06 +01:00
parent 1e2ea0bb9d
commit 7bb2905cca
89 changed files with 38245 additions and 1260 deletions
--- a/gpu_acceleration/marketplace_gpu_optimizer.py
+++ b/gpu_acceleration/marketplace_gpu_optimizer.py
@@ -0,0 +1,576 @@
+"""
+Marketplace GPU Resource Optimizer
+Optimizes GPU acceleration and resource utilization specifically for marketplace AI power trading
+"""
+
+import os
+import sys
+import time
+import json
+import logging
+import asyncio
+import numpy as np
+from typing import Dict, List, Optional, Any, Tuple
+from datetime import datetime
+import threading
+import multiprocessing
+
+# Try to import pycuda, fallback if not available
+try:
+    import pycuda.driver as cuda
+    import pycuda.autoinit
+    from pycuda.compiler import SourceModule
+    CUDA_AVAILABLE = True
+except ImportError:
+    CUDA_AVAILABLE = False
+    print("Warning: PyCUDA not available. GPU optimization will run in simulation mode.")
+
+logger = logging.getLogger(__name__)
+
+class MarketplaceGPUOptimizer:
+    """Optimizes GPU resources for marketplace AI power trading"""
+    
+    def __init__(self, simulation_mode: bool = not CUDA_AVAILABLE):
+        self.simulation_mode = simulation_mode
+        self.gpu_devices = []
+        self.gpu_memory_pools = {}
+        self.active_jobs = {}
+        self.resource_metrics = {
+            'total_utilization': 0.0,
+            'memory_utilization': 0.0,
+            'compute_utilization': 0.0,
+            'energy_efficiency': 0.0,
+            'jobs_processed': 0,
+            'failed_jobs': 0
+        }
+        
+        # Optimization configuration
+        self.config = {
+            'memory_fragmentation_threshold': 0.15,  # 15%
+            'dynamic_batching_enabled': True,
+            'max_batch_size': 128,
+            'idle_power_state': 'P8',
+            'active_power_state': 'P0',
+            'thermal_throttle_threshold': 85.0  # Celsius
+        }
+        
+        self.lock = threading.Lock()
+        self._initialize_gpu_devices()
+        
+    def _initialize_gpu_devices(self):
+        """Initialize available GPU devices"""
+        if self.simulation_mode:
+            # Create simulated GPUs
+            self.gpu_devices = [
+                {
+                    'id': 0,
+                    'name': 'Simulated RTX 4090',
+                    'total_memory': 24 * 1024 * 1024 * 1024,  # 24GB
+                    'free_memory': 24 * 1024 * 1024 * 1024,
+                    'compute_capability': (8, 9),
+                    'utilization': 0.0,
+                    'temperature': 45.0,
+                    'power_draw': 30.0,
+                    'power_limit': 450.0,
+                    'status': 'idle'
+                },
+                {
+                    'id': 1,
+                    'name': 'Simulated RTX 4090',
+                    'total_memory': 24 * 1024 * 1024 * 1024,
+                    'free_memory': 24 * 1024 * 1024 * 1024,
+                    'compute_capability': (8, 9),
+                    'utilization': 0.0,
+                    'temperature': 42.0,
+                    'power_draw': 28.0,
+                    'power_limit': 450.0,
+                    'status': 'idle'
+                }
+            ]
+            logger.info(f"Initialized {len(self.gpu_devices)} simulated GPU devices")
+        else:
+            try:
+                # Initialize real GPUs via PyCUDA
+                num_devices = cuda.Device.count()
+                for i in range(num_devices):
+                    dev = cuda.Device(i)
+                    free_mem, total_mem = cuda.mem_get_info()
+                    
+                    self.gpu_devices.append({
+                        'id': i,
+                        'name': dev.name(),
+                        'total_memory': total_mem,
+                        'free_memory': free_mem,
+                        'compute_capability': dev.compute_capability(),
+                        'utilization': 0.0,  # Would need NVML for real utilization
+                        'temperature': 0.0,  # Would need NVML
+                        'power_draw': 0.0,   # Would need NVML
+                        'power_limit': 0.0,  # Would need NVML
+                        'status': 'idle'
+                    })
+                logger.info(f"Initialized {len(self.gpu_devices)} real GPU devices")
+            except Exception as e:
+                logger.error(f"Error initializing GPUs: {e}")
+                self.simulation_mode = True
+                self._initialize_gpu_devices()  # Fallback to simulation
+                
+        # Initialize memory pools for each device
+        for gpu in self.gpu_devices:
+            self.gpu_memory_pools[gpu['id']] = {
+                'allocated_blocks': [],
+                'free_blocks': [{'start': 0, 'size': gpu['total_memory']}],
+                'fragmentation': 0.0
+            }
+            
+    async def optimize_resource_allocation(self, job_requirements: Dict[str, Any]) -> Dict[str, Any]:
+        """
+        Optimize GPU resource allocation for a new marketplace job
+        Returns the allocation plan or rejection if resources unavailable
+        """
+        required_memory = job_requirements.get('memory_bytes', 1024 * 1024 * 1024)  # Default 1GB
+        required_compute = job_requirements.get('compute_units', 1.0)
+        max_latency = job_requirements.get('max_latency_ms', 1000)
+        priority = job_requirements.get('priority', 1)  # 1 (low) to 10 (high)
+        
+        with self.lock:
+            # 1. Find optimal GPU
+            best_gpu_id = -1
+            best_score = -1.0
+            
+            for gpu in self.gpu_devices:
+                # Check constraints
+                if gpu['free_memory'] < required_memory:
+                    continue
+                    
+                if gpu['temperature'] > self.config['thermal_throttle_threshold'] and priority < 8:
+                    continue # Reserve hot GPUs for high priority only
+                    
+                # Calculate optimization score (higher is better)
+                # We want to balance load but also minimize fragmentation
+                mem_utilization = 1.0 - (gpu['free_memory'] / gpu['total_memory'])
+                comp_utilization = gpu['utilization']
+                
+                # Formula: Favor GPUs with enough space but try to pack jobs efficiently
+                # Penalty for high temp and high current utilization
+                score = 100.0
+                score -= (comp_utilization * 40.0)
+                score -= ((gpu['temperature'] - 40.0) * 1.5)
+                
+                # Memory fit score: tighter fit is better to reduce fragmentation
+                mem_fit_ratio = required_memory / gpu['free_memory']
+                score += (mem_fit_ratio * 20.0)
+                
+                if score > best_score:
+                    best_score = score
+                    best_gpu_id = gpu['id']
+                    
+            if best_gpu_id == -1:
+                # No GPU available, try optimization strategies
+                if await self._attempt_memory_defragmentation():
+                    return await self.optimize_resource_allocation(job_requirements)
+                elif await self._preempt_low_priority_jobs(priority, required_memory):
+                    return await self.optimize_resource_allocation(job_requirements)
+                else:
+                    return {
+                        'success': False,
+                        'reason': 'Insufficient GPU resources available even after optimization',
+                        'queued': True,
+                        'estimated_wait_ms': 5000
+                    }
+                    
+            # 2. Allocate resources on best GPU
+            job_id = f"job_{uuid4().hex[:8]}" if 'job_id' not in job_requirements else job_requirements['job_id']
+            
+            allocation = self._allocate_memory(best_gpu_id, required_memory, job_id)
+            if not allocation['success']:
+                return {
+                    'success': False,
+                    'reason': 'Memory allocation failed due to fragmentation',
+                    'queued': True
+                }
+                
+            # 3. Update state
+            for i, gpu in enumerate(self.gpu_devices):
+                if gpu['id'] == best_gpu_id:
+                    self.gpu_devices[i]['free_memory'] -= required_memory
+                    self.gpu_devices[i]['utilization'] = min(1.0, self.gpu_devices[i]['utilization'] + (required_compute * 0.1))
+                    self.gpu_devices[i]['status'] = 'active'
+                    break
+                    
+            self.active_jobs[job_id] = {
+                'gpu_id': best_gpu_id,
+                'memory_allocated': required_memory,
+                'compute_allocated': required_compute,
+                'priority': priority,
+                'start_time': time.time(),
+                'status': 'running'
+            }
+            
+            self._update_metrics()
+            
+            return {
+                'success': True,
+                'job_id': job_id,
+                'gpu_id': best_gpu_id,
+                'allocation_plan': {
+                    'memory_blocks': allocation['blocks'],
+                    'dynamic_batching': self.config['dynamic_batching_enabled'],
+                    'power_state_enforced': self.config['active_power_state']
+                },
+                'estimated_completion_ms': int(required_compute * 100)
+            }
+            
+    def _allocate_memory(self, gpu_id: int, size: int, job_id: str) -> Dict[str, Any]:
+        """Custom memory allocator designed to minimize fragmentation"""
+        pool = self.gpu_memory_pools[gpu_id]
+        
+        # Sort free blocks by size (Best Fit algorithm)
+        pool['free_blocks'].sort(key=lambda x: x['size'])
+        
+        allocated_blocks = []
+        remaining_size = size
+        
+        # Try contiguous allocation first (Best Fit)
+        for i, block in enumerate(pool['free_blocks']):
+            if block['size'] >= size:
+                # Perfect or larger fit found
+                allocated_block = {
+                    'job_id': job_id,
+                    'start': block['start'],
+                    'size': size
+                }
+                allocated_blocks.append(allocated_block)
+                pool['allocated_blocks'].append(allocated_block)
+                
+                # Update free block
+                if block['size'] == size:
+                    pool['free_blocks'].pop(i)
+                else:
+                    block['start'] += size
+                    block['size'] -= size
+                    
+                self._recalculate_fragmentation(gpu_id)
+                return {'success': True, 'blocks': allocated_blocks}
+                
+        # If we reach here, we need to do scatter allocation (virtual memory mapping)
+        # This is more complex and less performant, but prevents OOM on fragmented memory
+        if sum(b['size'] for b in pool['free_blocks']) >= size:
+            # We have enough total memory, just fragmented
+            blocks_to_remove = []
+            
+            for i, block in enumerate(pool['free_blocks']):
+                if remaining_size <= 0:
+                    break
+                    
+                take_size = min(block['size'], remaining_size)
+                
+                allocated_block = {
+                    'job_id': job_id,
+                    'start': block['start'],
+                    'size': take_size
+                }
+                allocated_blocks.append(allocated_block)
+                pool['allocated_blocks'].append(allocated_block)
+                
+                if take_size == block['size']:
+                    blocks_to_remove.append(i)
+                else:
+                    block['start'] += take_size
+                    block['size'] -= take_size
+                    
+                remaining_size -= take_size
+                
+            # Remove fully utilized free blocks (in reverse order to not mess up indices)
+            for i in reversed(blocks_to_remove):
+                pool['free_blocks'].pop(i)
+                
+            self._recalculate_fragmentation(gpu_id)
+            return {'success': True, 'blocks': allocated_blocks, 'fragmented': True}
+            
+        return {'success': False}
+        
+    def release_resources(self, job_id: str) -> bool:
+        """Release resources when a job is complete"""
+        with self.lock:
+            if job_id not in self.active_jobs:
+                return False
+                
+            job = self.active_jobs[job_id]
+            gpu_id = job['gpu_id']
+            pool = self.gpu_memory_pools[gpu_id]
+            
+            # Find and remove allocated blocks
+            blocks_to_free = []
+            new_allocated = []
+            
+            for block in pool['allocated_blocks']:
+                if block['job_id'] == job_id:
+                    blocks_to_free.append({'start': block['start'], 'size': block['size']})
+                else:
+                    new_allocated.append(block)
+                    
+            pool['allocated_blocks'] = new_allocated
+            
+            # Add back to free blocks and merge adjacent
+            pool['free_blocks'].extend(blocks_to_free)
+            self._merge_free_blocks(gpu_id)
+            
+            # Update GPU state
+            for i, gpu in enumerate(self.gpu_devices):
+                if gpu['id'] == gpu_id:
+                    self.gpu_devices[i]['free_memory'] += job['memory_allocated']
+                    self.gpu_devices[i]['utilization'] = max(0.0, self.gpu_devices[i]['utilization'] - (job['compute_allocated'] * 0.1))
+                    
+                    if self.gpu_devices[i]['utilization'] <= 0.05:
+                        self.gpu_devices[i]['status'] = 'idle'
+                    break
+                    
+            # Update metrics
+            self.resource_metrics['jobs_processed'] += 1
+            if job['status'] == 'failed':
+                self.resource_metrics['failed_jobs'] += 1
+                
+            del self.active_jobs[job_id]
+            self._update_metrics()
+            
+            return True
+            
+    def _merge_free_blocks(self, gpu_id: int):
+        """Merge adjacent free memory blocks to reduce fragmentation"""
+        pool = self.gpu_memory_pools[gpu_id]
+        if len(pool['free_blocks']) <= 1:
+            return
+            
+        # Sort by start address
+        pool['free_blocks'].sort(key=lambda x: x['start'])
+        
+        merged = [pool['free_blocks'][0]]
+        for current in pool['free_blocks'][1:]:
+            previous = merged[-1]
+            # Check if adjacent
+            if previous['start'] + previous['size'] == current['start']:
+                previous['size'] += current['size']
+            else:
+                merged.append(current)
+                
+        pool['free_blocks'] = merged
+        self._recalculate_fragmentation(gpu_id)
+        
+    def _recalculate_fragmentation(self, gpu_id: int):
+        """Calculate memory fragmentation index (0.0 to 1.0)"""
+        pool = self.gpu_memory_pools[gpu_id]
+        if not pool['free_blocks']:
+            pool['fragmentation'] = 0.0
+            return
+            
+        total_free = sum(b['size'] for b in pool['free_blocks'])
+        if total_free == 0:
+            pool['fragmentation'] = 0.0
+            return
+            
+        max_block = max(b['size'] for b in pool['free_blocks'])
+        
+        # Fragmentation is high if the largest free block is much smaller than total free memory
+        pool['fragmentation'] = 1.0 - (max_block / total_free)
+        
+    async def _attempt_memory_defragmentation(self) -> bool:
+        """Attempt to defragment GPU memory by moving active allocations"""
+        # In a real scenario, this involves pausing kernels and cudaMemcpyDeviceToDevice
+        # Here we simulate the process if fragmentation is above threshold
+        
+        defrag_occurred = False
+        for gpu_id, pool in self.gpu_memory_pools.items():
+            if pool['fragmentation'] > self.config['memory_fragmentation_threshold']:
+                logger.info(f"Defragmenting GPU {gpu_id} (frag: {pool['fragmentation']:.2f})")
+                await asyncio.sleep(0.1) # Simulate defrag time
+                
+                # Simulate perfect defragmentation
+                total_allocated = sum(b['size'] for b in pool['allocated_blocks'])
+                
+                # Rebuild blocks optimally
+                new_allocated = []
+                current_ptr = 0
+                for block in pool['allocated_blocks']:
+                    new_allocated.append({
+                        'job_id': block['job_id'],
+                        'start': current_ptr,
+                        'size': block['size']
+                    })
+                    current_ptr += block['size']
+                    
+                pool['allocated_blocks'] = new_allocated
+                
+                gpu = next((g for g in self.gpu_devices if g['id'] == gpu_id), None)
+                if gpu:
+                    pool['free_blocks'] = [{
+                        'start': total_allocated,
+                        'size': gpu['total_memory'] - total_allocated
+                    }]
+                
+                pool['fragmentation'] = 0.0
+                defrag_occurred = True
+                
+        return defrag_occurred
+        
+
+    async def schedule_job(self, job_id: str, priority: int, memory_required: int, computation_complexity: float) -> bool:
+        """Dynamic Priority Queue: Schedule a job and potentially preempt running jobs"""
+        job_data = {
+            'job_id': job_id,
+            'priority': priority,
+            'memory_required': memory_required,
+            'computation_complexity': computation_complexity,
+            'status': 'queued',
+            'submitted_at': datetime.utcnow().isoformat()
+        }
+        
+        # Calculate scores and find best GPU
+        best_gpu = -1
+        best_score = -float('inf')
+        
+        for gpu_id, status in self.gpu_status.items():
+            pool = self.gpu_memory_pools[gpu_id]
+            available_mem = pool['total_memory'] - pool['allocated_memory']
+            
+            # Base score depends on memory availability
+            if available_mem >= memory_required:
+                score = (available_mem / pool['total_memory']) * 100
+                if score > best_score:
+                    best_score = score
+                    best_gpu = gpu_id
+                    
+        # If we found a GPU with enough free memory, allocate directly
+        if best_gpu >= 0:
+            alloc_result = self._allocate_memory(best_gpu, memory_required, job_id)
+            if alloc_result['success']:
+                job_data['status'] = 'running'
+                job_data['gpu_id'] = best_gpu
+                job_data['memory_allocated'] = memory_required
+                self.active_jobs[job_id] = job_data
+                return True
+                
+        # If no GPU is available, try to preempt lower priority jobs
+        logger.info(f"No GPU has {memory_required}MB free for job {job_id}. Attempting preemption...")
+        preempt_success = await self._preempt_low_priority_jobs(priority, memory_required)
+        
+        if preempt_success:
+            # We successfully preempted, now we should be able to allocate
+            for gpu_id, pool in self.gpu_memory_pools.items():
+                if (pool['total_memory'] - pool['allocated_memory']) >= memory_required:
+                    alloc_result = self._allocate_memory(gpu_id, memory_required, job_id)
+                    if alloc_result['success']:
+                        job_data['status'] = 'running'
+                        job_data['gpu_id'] = gpu_id
+                        job_data['memory_allocated'] = memory_required
+                        self.active_jobs[job_id] = job_data
+                        return True
+                        
+        logger.warning(f"Job {job_id} remains queued. Insufficient resources even after preemption.")
+        return False
+
+    async def _preempt_low_priority_jobs(self, incoming_priority: int, required_memory: int) -> bool:
+        """Preempt lower priority jobs to make room for higher priority ones"""
+        preemptable_jobs = []
+        for job_id, job in self.active_jobs.items():
+            if job['priority'] < incoming_priority:
+                preemptable_jobs.append((job_id, job))
+                
+        # Sort by priority (lowest first) then memory (largest first)
+        preemptable_jobs.sort(key=lambda x: (x[1]['priority'], -x[1]['memory_allocated']))
+        
+        freed_memory = 0
+        jobs_to_preempt = []
+        
+        for job_id, job in preemptable_jobs:
+            jobs_to_preempt.append(job_id)
+            freed_memory += job['memory_allocated']
+            if freed_memory >= required_memory:
+                break
+                
+        if freed_memory >= required_memory:
+            # Preempt the jobs
+            for job_id in jobs_to_preempt:
+                logger.info(f"Preempting low priority job {job_id} for higher priority request")
+                # In real scenario, would save state/checkpoint before killing
+                self.release_resources(job_id)
+                
+                # Notify job owner (simulated)
+                # event_bus.publish('job_preempted', {'job_id': job_id})
+                
+            return True
+            
+        return False
+        
+    def _update_metrics(self):
+        """Update overall system metrics"""
+        total_util = 0.0
+        total_mem_util = 0.0
+        
+        for gpu in self.gpu_devices:
+            mem_util = 1.0 - (gpu['free_memory'] / gpu['total_memory'])
+            total_mem_util += mem_util
+            total_util += gpu['utilization']
+            
+            # Simulate dynamic temperature and power based on utilization
+            if self.simulation_mode:
+                target_temp = 35.0 + (gpu['utilization'] * 50.0)
+                gpu['temperature'] = gpu['temperature'] * 0.9 + target_temp * 0.1
+                
+                target_power = 20.0 + (gpu['utilization'] * (gpu['power_limit'] - 20.0))
+                gpu['power_draw'] = gpu['power_draw'] * 0.8 + target_power * 0.2
+        
+        n_gpus = len(self.gpu_devices)
+        if n_gpus > 0:
+            self.resource_metrics['compute_utilization'] = total_util / n_gpus
+            self.resource_metrics['memory_utilization'] = total_mem_util / n_gpus
+            self.resource_metrics['total_utilization'] = (self.resource_metrics['compute_utilization'] + self.resource_metrics['memory_utilization']) / 2
+            
+            # Calculate energy efficiency (flops per watt approx)
+            total_power = sum(g['power_draw'] for g in self.gpu_devices)
+            if total_power > 0:
+                self.resource_metrics['energy_efficiency'] = (self.resource_metrics['compute_utilization'] * 100) / total_power
+                
+    def get_system_status(self) -> Dict[str, Any]:
+        """Get current system status and metrics"""
+        with self.lock:
+            self._update_metrics()
+            
+            devices_info = []
+            for gpu in self.gpu_devices:
+                pool = self.gpu_memory_pools[gpu['id']]
+                devices_info.append({
+                    'id': gpu['id'],
+                    'name': gpu['name'],
+                    'utilization': round(gpu['utilization'] * 100, 2),
+                    'memory_used_gb': round((gpu['total_memory'] - gpu['free_memory']) / (1024**3), 2),
+                    'memory_total_gb': round(gpu['total_memory'] / (1024**3), 2),
+                    'temperature_c': round(gpu['temperature'], 1),
+                    'power_draw_w': round(gpu['power_draw'], 1),
+                    'status': gpu['status'],
+                    'fragmentation': round(pool['fragmentation'] * 100, 2)
+                })
+                
+            return {
+                'timestamp': datetime.utcnow().isoformat(),
+                'active_jobs': len(self.active_jobs),
+                'metrics': {
+                    'overall_utilization_pct': round(self.resource_metrics['total_utilization'] * 100, 2),
+                    'compute_utilization_pct': round(self.resource_metrics['compute_utilization'] * 100, 2),
+                    'memory_utilization_pct': round(self.resource_metrics['memory_utilization'] * 100, 2),
+                    'energy_efficiency_score': round(self.resource_metrics['energy_efficiency'], 4),
+                    'jobs_processed_total': self.resource_metrics['jobs_processed']
+                },
+                'devices': devices_info
+            }
+
+# Example usage function
+async def optimize_marketplace_batch(jobs: List[Dict[str, Any]]):
+    """Process a batch of marketplace jobs through the optimizer"""
+    optimizer = MarketplaceGPUOptimizer()
+    
+    results = []
+    for job in jobs:
+        res = await optimizer.optimize_resource_allocation(job)
+        results.append(res)
+        
+    return results, optimizer.get_system_status()
--- a/gpu_acceleration/parallel_processing/distributed_framework.py
+++ b/gpu_acceleration/parallel_processing/distributed_framework.py
@@ -0,0 +1,468 @@
+"""
+Distributed Agent Processing Framework
+Implements a scalable, fault-tolerant framework for distributed AI agent tasks across the AITBC network.
+"""
+
+import asyncio
+import uuid
+import time
+import logging
+import json
+import hashlib
+from typing import Dict, List, Optional, Any, Callable, Awaitable
+from datetime import datetime
+from enum import Enum
+
+logger = logging.getLogger(__name__)
+
+class TaskStatus(str, Enum):
+    PENDING = "pending"
+    SCHEDULED = "scheduled"
+    PROCESSING = "processing"
+    COMPLETED = "completed"
+    FAILED = "failed"
+    TIMEOUT = "timeout"
+    RETRYING = "retrying"
+
+class WorkerStatus(str, Enum):
+    IDLE = "idle"
+    BUSY = "busy"
+    OFFLINE = "offline"
+    OVERLOADED = "overloaded"
+
+class DistributedTask:
+    def __init__(
+        self, 
+        task_id: str, 
+        agent_id: str, 
+        payload: Dict[str, Any],
+        priority: int = 1,
+        requires_gpu: bool = False,
+        timeout_ms: int = 30000,
+        max_retries: int = 3
+    ):
+        self.task_id = task_id or f"dt_{uuid.uuid4().hex[:12]}"
+        self.agent_id = agent_id
+        self.payload = payload
+        self.priority = priority
+        self.requires_gpu = requires_gpu
+        self.timeout_ms = timeout_ms
+        self.max_retries = max_retries
+        
+        self.status = TaskStatus.PENDING
+        self.created_at = time.time()
+        self.scheduled_at = None
+        self.started_at = None
+        self.completed_at = None
+        
+        self.assigned_worker_id = None
+        self.result = None
+        self.error = None
+        self.retries = 0
+        
+        # Calculate content hash for caching/deduplication
+        content = json.dumps(payload, sort_keys=True)
+        self.content_hash = hashlib.sha256(content.encode()).hexdigest()
+
+class WorkerNode:
+    def __init__(
+        self, 
+        worker_id: str, 
+        capabilities: List[str], 
+        has_gpu: bool = False,
+        max_concurrent_tasks: int = 4
+    ):
+        self.worker_id = worker_id
+        self.capabilities = capabilities
+        self.has_gpu = has_gpu
+        self.max_concurrent_tasks = max_concurrent_tasks
+        
+        self.status = WorkerStatus.IDLE
+        self.active_tasks = []
+        self.last_heartbeat = time.time()
+        self.total_completed = 0
+        self.performance_score = 1.0  # 0.0 to 1.0 based on success rate and speed
+
+class DistributedProcessingCoordinator:
+    """
+    Coordinates distributed task execution across available worker nodes.
+    Implements advanced scheduling, fault tolerance, and load balancing.
+    """
+    
+    def __init__(self):
+        self.tasks: Dict[str, DistributedTask] = {}
+        self.workers: Dict[str, WorkerNode] = {}
+        self.task_queue = asyncio.PriorityQueue()
+        
+        # Result cache (content_hash -> result)
+        self.result_cache: Dict[str, Any] = {}
+        
+        self.is_running = False
+        self._scheduler_task = None
+        self._monitor_task = None
+        
+    async def start(self):
+        """Start the coordinator background tasks"""
+        if self.is_running:
+            return
+            
+        self.is_running = True
+        self._scheduler_task = asyncio.create_task(self._scheduling_loop())
+        self._monitor_task = asyncio.create_task(self._health_monitor_loop())
+        logger.info("Distributed Processing Coordinator started")
+        
+    async def stop(self):
+        """Stop the coordinator gracefully"""
+        self.is_running = False
+        if self._scheduler_task:
+            self._scheduler_task.cancel()
+        if self._monitor_task:
+            self._monitor_task.cancel()
+        logger.info("Distributed Processing Coordinator stopped")
+        
+    def register_worker(self, worker_id: str, capabilities: List[str], has_gpu: bool = False, max_tasks: int = 4):
+        """Register a new worker node in the cluster"""
+        if worker_id not in self.workers:
+            self.workers[worker_id] = WorkerNode(worker_id, capabilities, has_gpu, max_tasks)
+            logger.info(f"Registered new worker node: {worker_id} (GPU: {has_gpu})")
+        else:
+            # Update existing worker
+            worker = self.workers[worker_id]
+            worker.capabilities = capabilities
+            worker.has_gpu = has_gpu
+            worker.max_concurrent_tasks = max_tasks
+            worker.last_heartbeat = time.time()
+            if worker.status == WorkerStatus.OFFLINE:
+                worker.status = WorkerStatus.IDLE
+                
+    def heartbeat(self, worker_id: str, metrics: Optional[Dict[str, Any]] = None):
+        """Record a heartbeat from a worker node"""
+        if worker_id in self.workers:
+            worker = self.workers[worker_id]
+            worker.last_heartbeat = time.time()
+            
+            # Update status based on metrics if provided
+            if metrics:
+                cpu_load = metrics.get('cpu_load', 0.0)
+                if cpu_load > 0.9 or len(worker.active_tasks) >= worker.max_concurrent_tasks:
+                    worker.status = WorkerStatus.OVERLOADED
+                elif len(worker.active_tasks) > 0:
+                    worker.status = WorkerStatus.BUSY
+                else:
+                    worker.status = WorkerStatus.IDLE
+
+    async def submit_task(self, task: DistributedTask) -> str:
+        """Submit a new task to the distributed framework"""
+        # Check cache first
+        if task.content_hash in self.result_cache:
+            task.status = TaskStatus.COMPLETED
+            task.result = self.result_cache[task.content_hash]
+            task.completed_at = time.time()
+            self.tasks[task.task_id] = task
+            logger.debug(f"Task {task.task_id} fulfilled from cache")
+            return task.task_id
+            
+        self.tasks[task.task_id] = task
+        # Priority Queue uses lowest number first, so we invert user priority
+        queue_priority = 100 - min(task.priority, 100)
+        
+        await self.task_queue.put((queue_priority, task.created_at, task.task_id))
+        logger.debug(f"Task {task.task_id} queued with priority {task.priority}")
+        
+        return task.task_id
+        
+    async def get_task_status(self, task_id: str) -> Optional[Dict[str, Any]]:
+        """Get the current status and result of a task"""
+        if task_id not in self.tasks:
+            return None
+            
+        task = self.tasks[task_id]
+        
+        response = {
+            'task_id': task.task_id,
+            'status': task.status,
+            'created_at': task.created_at
+        }
+        
+        if task.status == TaskStatus.COMPLETED:
+            response['result'] = task.result
+            response['completed_at'] = task.completed_at
+            response['duration_ms'] = int((task.completed_at - (task.started_at or task.created_at)) * 1000)
+        elif task.status in [TaskStatus.FAILED, TaskStatus.TIMEOUT]:
+            response['error'] = str(task.error)
+            
+        if task.assigned_worker_id:
+            response['worker_id'] = task.assigned_worker_id
+            
+        return response
+
+    async def _scheduling_loop(self):
+        """Background task that assigns queued tasks to available workers"""
+        while self.is_running:
+            try:
+                # Get next task from queue (blocks until available)
+                if self.task_queue.empty():
+                    await asyncio.sleep(0.1)
+                    continue
+                    
+                priority, _, task_id = await self.task_queue.get()
+                
+                if task_id not in self.tasks:
+                    self.task_queue.task_done()
+                    continue
+                    
+                task = self.tasks[task_id]
+                
+                # If task was cancelled while in queue
+                if task.status != TaskStatus.PENDING and task.status != TaskStatus.RETRYING:
+                    self.task_queue.task_done()
+                    continue
+                    
+                # Find best worker
+                best_worker = self._find_best_worker(task)
+                
+                if best_worker:
+                    await self._assign_task(task, best_worker)
+                else:
+                    # No worker available right now, put back in queue with slight delay
+                    # Use a background task to not block the scheduling loop
+                    asyncio.create_task(self._requeue_delayed(priority, task))
+                    
+                self.task_queue.task_done()
+                
+            except asyncio.CancelledError:
+                break
+            except Exception as e:
+                logger.error(f"Error in scheduling loop: {e}")
+                await asyncio.sleep(1.0)
+                
+    async def _requeue_delayed(self, priority: int, task: DistributedTask):
+        """Put a task back in the queue after a short delay"""
+        await asyncio.sleep(0.5)
+        if self.is_running and task.status in [TaskStatus.PENDING, TaskStatus.RETRYING]:
+            await self.task_queue.put((priority, task.created_at, task.task_id))
+
+    def _find_best_worker(self, task: DistributedTask) -> Optional[WorkerNode]:
+        """Find the optimal worker for a task based on requirements and load"""
+        candidates = []
+        
+        for worker in self.workers.values():
+            # Skip offline or overloaded workers
+            if worker.status in [WorkerStatus.OFFLINE, WorkerStatus.OVERLOADED]:
+                continue
+                
+            # Skip if worker is at capacity
+            if len(worker.active_tasks) >= worker.max_concurrent_tasks:
+                continue
+                
+            # Check GPU requirement
+            if task.requires_gpu and not worker.has_gpu:
+                continue
+                
+            # Required capability check could be added here
+            
+            # Calculate score for worker
+            score = worker.performance_score * 100
+            
+            # Penalize slightly based on current load to balance distribution
+            load_factor = len(worker.active_tasks) / worker.max_concurrent_tasks
+            score -= (load_factor * 20)
+            
+            # Prefer GPU workers for GPU tasks, penalize GPU workers for CPU tasks 
+            # to keep them free for GPU workloads
+            if worker.has_gpu and not task.requires_gpu:
+                score -= 30
+                
+            candidates.append((score, worker))
+            
+        if not candidates:
+            return None
+            
+        # Return worker with highest score
+        candidates.sort(key=lambda x: x[0], reverse=True)
+        return candidates[0][1]
+
+    async def _assign_task(self, task: DistributedTask, worker: WorkerNode):
+        """Assign a task to a specific worker"""
+        task.status = TaskStatus.SCHEDULED
+        task.assigned_worker_id = worker.worker_id
+        task.scheduled_at = time.time()
+        
+        worker.active_tasks.append(task.task_id)
+        if len(worker.active_tasks) >= worker.max_concurrent_tasks:
+            worker.status = WorkerStatus.OVERLOADED
+        elif worker.status == WorkerStatus.IDLE:
+            worker.status = WorkerStatus.BUSY
+            
+        logger.debug(f"Assigned task {task.task_id} to worker {worker.worker_id}")
+        
+        # In a real system, this would make an RPC/network call to the worker
+        # Here we simulate the network dispatch asynchronously
+        asyncio.create_task(self._simulate_worker_execution(task, worker))
+
+    async def _simulate_worker_execution(self, task: DistributedTask, worker: WorkerNode):
+        """Simulate the execution on the remote worker node"""
+        task.status = TaskStatus.PROCESSING
+        task.started_at = time.time()
+        
+        try:
+            # Simulate processing time based on task complexity
+            # Real implementation would await the actual RPC response
+            complexity = task.payload.get('complexity', 1.0)
+            base_time = 0.5
+            
+            if worker.has_gpu and task.requires_gpu:
+                # GPU processes faster
+                processing_time = base_time * complexity * 0.2
+            else:
+                processing_time = base_time * complexity
+                
+            # Simulate potential network/node failure
+            if worker.performance_score < 0.5 and time.time() % 10 < 1:
+                raise ConnectionError("Worker node network failure")
+                
+            await asyncio.sleep(processing_time)
+            
+            # Success
+            self.report_task_success(task.task_id, {"result_data": "simulated_success", "processed_by": worker.worker_id})
+            
+        except Exception as e:
+            self.report_task_failure(task.task_id, str(e))
+
+    def report_task_success(self, task_id: str, result: Any):
+        """Called by a worker when a task completes successfully"""
+        if task_id not in self.tasks:
+            return
+            
+        task = self.tasks[task_id]
+        if task.status in [TaskStatus.COMPLETED, TaskStatus.FAILED, TaskStatus.TIMEOUT]:
+            return # Already finished
+            
+        task.status = TaskStatus.COMPLETED
+        task.result = result
+        task.completed_at = time.time()
+        
+        # Cache the result
+        self.result_cache[task.content_hash] = result
+        
+        # Update worker metrics
+        if task.assigned_worker_id and task.assigned_worker_id in self.workers:
+            worker = self.workers[task.assigned_worker_id]
+            if task_id in worker.active_tasks:
+                worker.active_tasks.remove(task_id)
+            worker.total_completed += 1
+            # Increase performance score slightly (max 1.0)
+            worker.performance_score = min(1.0, worker.performance_score + 0.01)
+            
+            if len(worker.active_tasks) < worker.max_concurrent_tasks and worker.status == WorkerStatus.OVERLOADED:
+                worker.status = WorkerStatus.BUSY
+            if len(worker.active_tasks) == 0:
+                worker.status = WorkerStatus.IDLE
+                
+        logger.info(f"Task {task_id} completed successfully")
+
+    def report_task_failure(self, task_id: str, error: str):
+        """Called when a task fails execution"""
+        if task_id not in self.tasks:
+            return
+            
+        task = self.tasks[task_id]
+        
+        # Update worker metrics
+        if task.assigned_worker_id and task.assigned_worker_id in self.workers:
+            worker = self.workers[task.assigned_worker_id]
+            if task_id in worker.active_tasks:
+                worker.active_tasks.remove(task_id)
+            # Decrease performance score heavily on failure
+            worker.performance_score = max(0.1, worker.performance_score - 0.05)
+            
+        # Handle retry logic
+        if task.retries < task.max_retries:
+            task.retries += 1
+            task.status = TaskStatus.RETRYING
+            task.assigned_worker_id = None
+            task.error = f"Attempt {task.retries} failed: {error}"
+            
+            logger.warning(f"Task {task_id} failed, scheduling retry {task.retries}/{task.max_retries}")
+            
+            # Put back in queue with slightly lower priority
+            queue_priority = (100 - min(task.priority, 100)) + (task.retries * 5)
+            asyncio.create_task(self.task_queue.put((queue_priority, time.time(), task.task_id)))
+        else:
+            task.status = TaskStatus.FAILED
+            task.error = f"Max retries exceeded. Final error: {error}"
+            task.completed_at = time.time()
+            logger.error(f"Task {task_id} failed permanently")
+
+    async def _health_monitor_loop(self):
+        """Background task that monitors worker health and task timeouts"""
+        while self.is_running:
+            try:
+                current_time = time.time()
+                
+                # 1. Check worker health
+                for worker_id, worker in self.workers.items():
+                    # If no heartbeat for 60 seconds, mark offline
+                    if current_time - worker.last_heartbeat > 60.0:
+                        if worker.status != WorkerStatus.OFFLINE:
+                            logger.warning(f"Worker {worker_id} went offline (missed heartbeats)")
+                            worker.status = WorkerStatus.OFFLINE
+                            
+                            # Re-queue all active tasks for this worker
+                            for task_id in worker.active_tasks:
+                                if task_id in self.tasks:
+                                    self.report_task_failure(task_id, "Worker node disconnected")
+                            worker.active_tasks.clear()
+                            
+                # 2. Check task timeouts
+                for task_id, task in self.tasks.items():
+                    if task.status in [TaskStatus.SCHEDULED, TaskStatus.PROCESSING]:
+                        start_time = task.started_at or task.scheduled_at
+                        if start_time and (current_time - start_time) * 1000 > task.timeout_ms:
+                            logger.warning(f"Task {task_id} timed out")
+                            self.report_task_failure(task_id, f"Execution timed out after {task.timeout_ms}ms")
+                            
+                await asyncio.sleep(5.0)  # Check every 5 seconds
+                
+            except asyncio.CancelledError:
+                break
+            except Exception as e:
+                logger.error(f"Error in health monitor loop: {e}")
+                await asyncio.sleep(5.0)
+
+    def get_cluster_status(self) -> Dict[str, Any]:
+        """Get the overall status of the distributed cluster"""
+        total_workers = len(self.workers)
+        active_workers = sum(1 for w in self.workers.values() if w.status != WorkerStatus.OFFLINE)
+        gpu_workers = sum(1 for w in self.workers.values() if w.has_gpu and w.status != WorkerStatus.OFFLINE)
+        
+        pending_tasks = sum(1 for t in self.tasks.values() if t.status == TaskStatus.PENDING)
+        processing_tasks = sum(1 for t in self.tasks.values() if t.status in [TaskStatus.SCHEDULED, TaskStatus.PROCESSING])
+        completed_tasks = sum(1 for t in self.tasks.values() if t.status == TaskStatus.COMPLETED)
+        failed_tasks = sum(1 for t in self.tasks.values() if t.status in [TaskStatus.FAILED, TaskStatus.TIMEOUT])
+        
+        # Calculate cluster utilization
+        total_capacity = sum(w.max_concurrent_tasks for w in self.workers.values() if w.status != WorkerStatus.OFFLINE)
+        current_load = sum(len(w.active_tasks) for w in self.workers.values() if w.status != WorkerStatus.OFFLINE)
+        
+        utilization = (current_load / total_capacity * 100) if total_capacity > 0 else 0
+        
+        return {
+            "cluster_health": "healthy" if active_workers > 0 else "offline",
+            "nodes": {
+                "total": total_workers,
+                "active": active_workers,
+                "with_gpu": gpu_workers
+            },
+            "tasks": {
+                "pending": pending_tasks,
+                "processing": processing_tasks,
+                "completed": completed_tasks,
+                "failed": failed_tasks
+            },
+            "performance": {
+                "utilization_percent": round(utilization, 2),
+                "cache_size": len(self.result_cache)
+            },
+            "timestamp": datetime.utcnow().isoformat()
+        }
--- a/gpu_acceleration/parallel_processing/marketplace_cache_optimizer.py
+++ b/gpu_acceleration/parallel_processing/marketplace_cache_optimizer.py
@@ -0,0 +1,246 @@
+"""
+Marketplace Caching & Optimization Service
+Implements advanced caching, indexing, and data optimization for the AITBC marketplace.
+"""
+
+import json
+import time
+import hashlib
+import logging
+from typing import Dict, List, Optional, Any, Union, Set
+from collections import OrderedDict
+from datetime import datetime
+
+import redis.asyncio as redis
+
+logger = logging.getLogger(__name__)
+
+class LFU_LRU_Cache:
+    """Hybrid Least-Frequently/Least-Recently Used Cache for in-memory optimization"""
+    
+    def __init__(self, capacity: int):
+        self.capacity = capacity
+        self.cache = {}
+        self.frequencies = {}
+        self.frequency_lists = {}
+        self.min_freq = 0
+        
+    def get(self, key: str) -> Optional[Any]:
+        if key not in self.cache:
+            return None
+            
+        # Update frequency
+        freq = self.frequencies[key]
+        val = self.cache[key]
+        
+        # Remove from current frequency list
+        self.frequency_lists[freq].remove(key)
+        if not self.frequency_lists[freq] and self.min_freq == freq:
+            self.min_freq += 1
+            
+        # Add to next frequency list
+        new_freq = freq + 1
+        self.frequencies[key] = new_freq
+        if new_freq not in self.frequency_lists:
+            self.frequency_lists[new_freq] = OrderedDict()
+        self.frequency_lists[new_freq][key] = None
+        
+        return val
+        
+    def put(self, key: str, value: Any):
+        if self.capacity == 0:
+            return
+            
+        if key in self.cache:
+            self.cache[key] = value
+            self.get(key) # Update frequency
+            return
+            
+        if len(self.cache) >= self.capacity:
+            # Evict least frequently used item (if tie, least recently used)
+            evict_key, _ = self.frequency_lists[self.min_freq].popitem(last=False)
+            del self.cache[evict_key]
+            del self.frequencies[evict_key]
+            
+        # Add new item
+        self.cache[key] = value
+        self.frequencies[key] = 1
+        self.min_freq = 1
+        
+        if 1 not in self.frequency_lists:
+            self.frequency_lists[1] = OrderedDict()
+        self.frequency_lists[1][key] = None
+
+class MarketplaceDataOptimizer:
+    """Advanced optimization engine for marketplace data access"""
+    
+    def __init__(self, redis_url: str = "redis://localhost:6379/0"):
+        self.redis_url = redis_url
+        self.redis_client = None
+        
+        # Two-tier cache: Fast L1 (Memory), Slower L2 (Redis)
+        self.l1_cache = LFU_LRU_Cache(capacity=1000)
+        self.is_connected = False
+        
+        # Cache TTL defaults
+        self.ttls = {
+            'order_book': 5,          # Very dynamic, 5 seconds
+            'provider_status': 15,    # 15 seconds
+            'market_stats': 60,       # 1 minute
+            'historical_data': 3600   # 1 hour
+        }
+        
+    async def connect(self):
+        """Establish connection to Redis L2 cache"""
+        try:
+            self.redis_client = redis.from_url(self.redis_url, decode_responses=True)
+            await self.redis_client.ping()
+            self.is_connected = True
+            logger.info("Connected to Redis L2 cache")
+        except Exception as e:
+            logger.error(f"Failed to connect to Redis: {e}. Falling back to L1 cache only.")
+            self.is_connected = False
+            
+    async def disconnect(self):
+        """Close Redis connection"""
+        if self.redis_client:
+            await self.redis_client.close()
+            self.is_connected = False
+            
+    def _generate_cache_key(self, namespace: str, params: Dict[str, Any]) -> str:
+        """Generate a deterministic cache key from parameters"""
+        param_str = json.dumps(params, sort_keys=True)
+        param_hash = hashlib.md5(param_str.encode()).hexdigest()
+        return f"mkpt:{namespace}:{param_hash}"
+        
+    async def get_cached_data(self, namespace: str, params: Dict[str, Any]) -> Optional[Any]:
+        """Retrieve data from the multi-tier cache"""
+        key = self._generate_cache_key(namespace, params)
+        
+        # 1. Try L1 Memory Cache (fastest)
+        l1_result = self.l1_cache.get(key)
+        if l1_result is not None:
+            # Check if expired
+            if l1_result['expires_at'] > time.time():
+                logger.debug(f"L1 Cache hit for {key}")
+                return l1_result['data']
+                
+        # 2. Try L2 Redis Cache
+        if self.is_connected:
+            try:
+                l2_result_str = await self.redis_client.get(key)
+                if l2_result_str:
+                    logger.debug(f"L2 Cache hit for {key}")
+                    data = json.loads(l2_result_str)
+                    
+                    # Backfill L1 cache
+                    ttl = self.ttls.get(namespace, 60)
+                    self.l1_cache.put(key, {
+                        'data': data,
+                        'expires_at': time.time() + min(ttl, 10) # L1 expires sooner than L2
+                    })
+                    return data
+            except Exception as e:
+                logger.warning(f"Redis get failed: {e}")
+                
+        return None
+        
+    async def set_cached_data(self, namespace: str, params: Dict[str, Any], data: Any, custom_ttl: int = None):
+        """Store data in the multi-tier cache"""
+        key = self._generate_cache_key(namespace, params)
+        ttl = custom_ttl or self.ttls.get(namespace, 60)
+        
+        # 1. Update L1 Cache
+        self.l1_cache.put(key, {
+            'data': data,
+            'expires_at': time.time() + ttl
+        })
+        
+        # 2. Update L2 Redis Cache asynchronously
+        if self.is_connected:
+            try:
+                # We don't await this to keep the main thread fast
+                # In FastAPI we would use BackgroundTasks
+                await self.redis_client.setex(
+                    key, 
+                    ttl, 
+                    json.dumps(data)
+                )
+            except Exception as e:
+                logger.warning(f"Redis set failed: {e}")
+                
+    async def invalidate_namespace(self, namespace: str):
+        """Invalidate all cached items for a specific namespace"""
+        if self.is_connected:
+            try:
+                # Find all keys matching namespace pattern
+                cursor = 0
+                pattern = f"mkpt:{namespace}:*"
+                
+                while True:
+                    cursor, keys = await self.redis_client.scan(cursor=cursor, match=pattern, count=100)
+                    if keys:
+                        await self.redis_client.delete(*keys)
+                    if cursor == 0:
+                        break
+                        
+                logger.info(f"Invalidated L2 cache namespace: {namespace}")
+            except Exception as e:
+                logger.error(f"Failed to invalidate namespace {namespace}: {e}")
+                
+        # L1 invalidation is harder without scanning the whole dict
+        # We'll just let them naturally expire or get evicted
+                
+    async def precompute_market_stats(self, db_session) -> Dict[str, Any]:
+        """Background task to precompute expensive market statistics and cache them"""
+        # This would normally run periodically via Celery/Celery Beat
+        start_time = time.time()
+        
+        # Simulated expensive DB aggregations
+        # In reality: SELECT AVG(price), SUM(volume) FROM trades WHERE created_at > NOW() - 24h
+        stats = {
+            "24h_volume": 1250000.50,
+            "active_providers": 450,
+            "average_price_per_tflop": 0.005,
+            "network_utilization": 0.76,
+            "computed_at": datetime.utcnow().isoformat(),
+            "computation_time_ms": int((time.time() - start_time) * 1000)
+        }
+        
+        # Cache the precomputed stats
+        await self.set_cached_data('market_stats', {'period': '24h'}, stats, custom_ttl=300)
+        
+        return stats
+        
+    def optimize_order_book_response(self, raw_orders: List[Dict], depth: int = 50) -> Dict[str, List]:
+        """
+        Optimize the raw order book for client delivery.
+        Groups similar prices, limits depth, and formats efficiently.
+        """
+        buy_orders = [o for o in raw_orders if o['type'] == 'buy']
+        sell_orders = [o for o in raw_orders if o['type'] == 'sell']
+        
+        # Aggregate by price level to reduce payload size
+        agg_buys = {}
+        for order in buy_orders:
+            price = round(order['price'], 4)
+            if price not in agg_buys:
+                agg_buys[price] = 0
+            agg_buys[price] += order['amount']
+            
+        agg_sells = {}
+        for order in sell_orders:
+            price = round(order['price'], 4)
+            if price not in agg_sells:
+                agg_sells[price] = 0
+            agg_sells[price] += order['amount']
+            
+        # Format and sort
+        formatted_buys = [[p, q] for p, q in sorted(agg_buys.items(), reverse=True)[:depth]]
+        formatted_sells = [[p, q] for p, q in sorted(agg_sells.items())[:depth]]
+        
+        return {
+            "bids": formatted_buys,
+            "asks": formatted_sells,
+            "timestamp": time.time()
+        }
--- a/gpu_acceleration/parallel_processing/marketplace_monitor.py
+++ b/gpu_acceleration/parallel_processing/marketplace_monitor.py
@@ -0,0 +1,236 @@
+"""
+Marketplace Real-time Performance Monitor
+Implements comprehensive real-time monitoring and analytics for the AITBC marketplace.
+"""
+
+import time
+import asyncio
+import logging
+from typing import Dict, List, Optional, Any, collections
+from datetime import datetime, timedelta
+import collections
+
+logger = logging.getLogger(__name__)
+
+class TimeSeriesData:
+    """Efficient in-memory time series data structure for real-time metrics"""
+    
+    def __init__(self, max_points: int = 3600): # Default 1 hour of second-level data
+        self.max_points = max_points
+        self.timestamps = collections.deque(maxlen=max_points)
+        self.values = collections.deque(maxlen=max_points)
+        
+    def add(self, value: float, timestamp: float = None):
+        self.timestamps.append(timestamp or time.time())
+        self.values.append(value)
+        
+    def get_latest(self) -> Optional[float]:
+        return self.values[-1] if self.values else None
+        
+    def get_average(self, window_seconds: int = 60) -> float:
+        if not self.values:
+            return 0.0
+            
+        cutoff = time.time() - window_seconds
+        valid_values = [v for t, v in zip(self.timestamps, self.values) if t >= cutoff]
+        
+        return sum(valid_values) / len(valid_values) if valid_values else 0.0
+        
+    def get_percentile(self, percentile: float, window_seconds: int = 60) -> float:
+        if not self.values:
+            return 0.0
+            
+        cutoff = time.time() - window_seconds
+        valid_values = sorted([v for t, v in zip(self.timestamps, self.values) if t >= cutoff])
+        
+        if not valid_values:
+            return 0.0
+            
+        idx = int(len(valid_values) * percentile)
+        idx = min(max(idx, 0), len(valid_values) - 1)
+        return valid_values[idx]
+
+class MarketplaceMonitor:
+    """Real-time performance monitoring system for the marketplace"""
+    
+    def __init__(self):
+        # API Metrics
+        self.api_latency_ms = TimeSeriesData()
+        self.api_requests_per_sec = TimeSeriesData()
+        self.api_error_rate = TimeSeriesData()
+        
+        # Trading Metrics
+        self.order_matching_time_ms = TimeSeriesData()
+        self.trades_per_sec = TimeSeriesData()
+        self.active_orders = TimeSeriesData()
+        
+        # Resource Metrics
+        self.gpu_utilization_pct = TimeSeriesData()
+        self.network_bandwidth_mbps = TimeSeriesData()
+        self.active_providers = TimeSeriesData()
+        
+        # internal tracking
+        self._request_counter = 0
+        self._error_counter = 0
+        self._trade_counter = 0
+        self._last_tick = time.time()
+        
+        self.is_running = False
+        self._monitor_task = None
+        
+        # Alert thresholds
+        self.alert_thresholds = {
+            'api_latency_p95_ms': 500.0,
+            'api_error_rate_pct': 5.0,
+            'gpu_utilization_pct': 90.0,
+            'matching_time_ms': 100.0
+        }
+        
+        self.active_alerts = []
+        
+    async def start(self):
+        if self.is_running:
+            return
+        self.is_running = True
+        self._monitor_task = asyncio.create_task(self._metric_tick_loop())
+        logger.info("Marketplace Monitor started")
+        
+    async def stop(self):
+        self.is_running = False
+        if self._monitor_task:
+            self._monitor_task.cancel()
+        logger.info("Marketplace Monitor stopped")
+        
+    def record_api_call(self, latency_ms: float, is_error: bool = False):
+        """Record an API request for monitoring"""
+        self.api_latency_ms.add(latency_ms)
+        self._request_counter += 1
+        if is_error:
+            self._error_counter += 1
+            
+    def record_trade(self, matching_time_ms: float):
+        """Record a successful trade match"""
+        self.order_matching_time_ms.add(matching_time_ms)
+        self._trade_counter += 1
+        
+    def update_resource_metrics(self, gpu_util: float, bandwidth: float, providers: int, orders: int):
+        """Update system resource metrics"""
+        self.gpu_utilization_pct.add(gpu_util)
+        self.network_bandwidth_mbps.add(bandwidth)
+        self.active_providers.add(providers)
+        self.active_orders.add(orders)
+        
+    async def _metric_tick_loop(self):
+        """Background task that aggregates metrics every second"""
+        while self.is_running:
+            try:
+                now = time.time()
+                elapsed = now - self._last_tick
+                
+                if elapsed >= 1.0:
+                    # Calculate rates
+                    req_per_sec = self._request_counter / elapsed
+                    trades_per_sec = self._trade_counter / elapsed
+                    error_rate = (self._error_counter / max(1, self._request_counter)) * 100
+                    
+                    # Store metrics
+                    self.api_requests_per_sec.add(req_per_sec)
+                    self.trades_per_sec.add(trades_per_sec)
+                    self.api_error_rate.add(error_rate)
+                    
+                    # Reset counters
+                    self._request_counter = 0
+                    self._error_counter = 0
+                    self._trade_counter = 0
+                    self._last_tick = now
+                    
+                    # Evaluate alerts
+                    self._evaluate_alerts()
+                    
+                await asyncio.sleep(1.0 - (time.time() - now)) # Sleep for remainder of second
+                
+            except asyncio.CancelledError:
+                break
+            except Exception as e:
+                logger.error(f"Error in monitor tick loop: {e}")
+                await asyncio.sleep(1.0)
+                
+    def _evaluate_alerts(self):
+        """Check metrics against thresholds and generate alerts"""
+        current_alerts = []
+        
+        # API Latency Alert
+        p95_latency = self.api_latency_ms.get_percentile(0.95, window_seconds=60)
+        if p95_latency > self.alert_thresholds['api_latency_p95_ms']:
+            current_alerts.append({
+                'id': f"alert_latency_{int(time.time())}",
+                'severity': 'high' if p95_latency > self.alert_thresholds['api_latency_p95_ms'] * 2 else 'medium',
+                'metric': 'api_latency',
+                'value': p95_latency,
+                'threshold': self.alert_thresholds['api_latency_p95_ms'],
+                'message': f"High API Latency (p95): {p95_latency:.2f}ms",
+                'timestamp': datetime.utcnow().isoformat()
+            })
+            
+        # Error Rate Alert
+        avg_error_rate = self.api_error_rate.get_average(window_seconds=60)
+        if avg_error_rate > self.alert_thresholds['api_error_rate_pct']:
+            current_alerts.append({
+                'id': f"alert_error_{int(time.time())}",
+                'severity': 'critical',
+                'metric': 'error_rate',
+                'value': avg_error_rate,
+                'threshold': self.alert_thresholds['api_error_rate_pct'],
+                'message': f"High API Error Rate: {avg_error_rate:.2f}%",
+                'timestamp': datetime.utcnow().isoformat()
+            })
+            
+        # Matching Time Alert
+        avg_matching = self.order_matching_time_ms.get_average(window_seconds=60)
+        if avg_matching > self.alert_thresholds['matching_time_ms']:
+            current_alerts.append({
+                'id': f"alert_matching_{int(time.time())}",
+                'severity': 'medium',
+                'metric': 'matching_time',
+                'value': avg_matching,
+                'threshold': self.alert_thresholds['matching_time_ms'],
+                'message': f"Slow Order Matching: {avg_matching:.2f}ms",
+                'timestamp': datetime.utcnow().isoformat()
+            })
+            
+        self.active_alerts = current_alerts
+        
+        if current_alerts:
+            # In a real system, this would trigger webhooks, Slack/Discord messages, etc.
+            for alert in current_alerts:
+                if alert['severity'] in ['high', 'critical']:
+                    logger.warning(f"MARKETPLACE ALERT: {alert['message']}")
+
+    def get_realtime_dashboard_data(self) -> Dict[str, Any]:
+        """Get aggregated data formatted for the frontend dashboard"""
+        return {
+            'status': 'degraded' if any(a['severity'] in ['high', 'critical'] for a in self.active_alerts) else 'healthy',
+            'timestamp': datetime.utcnow().isoformat(),
+            'current_metrics': {
+                'api': {
+                    'rps': round(self.api_requests_per_sec.get_latest() or 0, 2),
+                    'latency_p50_ms': round(self.api_latency_ms.get_percentile(0.50, 60), 2),
+                    'latency_p95_ms': round(self.api_latency_ms.get_percentile(0.95, 60), 2),
+                    'error_rate_pct': round(self.api_error_rate.get_average(60), 2)
+                },
+                'trading': {
+                    'tps': round(self.trades_per_sec.get_latest() or 0, 2),
+                    'matching_time_ms': round(self.order_matching_time_ms.get_average(60), 2),
+                    'active_orders': int(self.active_orders.get_latest() or 0)
+                },
+                'network': {
+                    'active_providers': int(self.active_providers.get_latest() or 0),
+                    'gpu_utilization_pct': round(self.gpu_utilization_pct.get_latest() or 0, 2),
+                    'bandwidth_mbps': round(self.network_bandwidth_mbps.get_latest() or 0, 2)
+                }
+            },
+            'alerts': self.active_alerts
+        }
+
+# Global instance
+monitor = MarketplaceMonitor()
--- a/gpu_acceleration/parallel_processing/marketplace_scaler.py
+++ b/gpu_acceleration/parallel_processing/marketplace_scaler.py
@@ -0,0 +1,265 @@
+"""
+Marketplace Adaptive Resource Scaler
+Implements predictive and reactive auto-scaling of marketplace resources based on demand.
+"""
+
+import time
+import asyncio
+import logging
+from typing import Dict, List, Optional, Any, Tuple
+from datetime import datetime, timedelta
+import math
+
+logger = logging.getLogger(__name__)
+
+class ScalingPolicy:
+    """Configuration for scaling behavior"""
+    def __init__(
+        self,
+        min_nodes: int = 2,
+        max_nodes: int = 100,
+        target_utilization: float = 0.75,
+        scale_up_threshold: float = 0.85,
+        scale_down_threshold: float = 0.40,
+        cooldown_period_sec: int = 300, # 5 minutes between scaling actions
+        predictive_scaling: bool = True
+    ):
+        self.min_nodes = min_nodes
+        self.max_nodes = max_nodes
+        self.target_utilization = target_utilization
+        self.scale_up_threshold = scale_up_threshold
+        self.scale_down_threshold = scale_down_threshold
+        self.cooldown_period_sec = cooldown_period_sec
+        self.predictive_scaling = predictive_scaling
+
+class ResourceScaler:
+    """Adaptive resource scaling engine for the AITBC marketplace"""
+    
+    def __init__(self, policy: Optional[ScalingPolicy] = None):
+        self.policy = policy or ScalingPolicy()
+        
+        # Current state
+        self.current_nodes = self.policy.min_nodes
+        self.active_gpu_nodes = 0
+        self.active_cpu_nodes = self.policy.min_nodes
+        
+        self.last_scaling_action_time = 0
+        self.scaling_history = []
+        
+        # Historical demand tracking for predictive scaling
+        # Format: hour_of_week (0-167) -> avg_utilization
+        self.historical_demand = {}
+        
+        self.is_running = False
+        self._scaler_task = None
+        
+    async def start(self):
+        if self.is_running:
+            return
+        self.is_running = True
+        self._scaler_task = asyncio.create_task(self._scaling_loop())
+        logger.info(f"Resource Scaler started (Min: {self.policy.min_nodes}, Max: {self.policy.max_nodes})")
+        
+    async def stop(self):
+        self.is_running = False
+        if self._scaler_task:
+            self._scaler_task.cancel()
+        logger.info("Resource Scaler stopped")
+        
+    def update_historical_demand(self, utilization: float):
+        """Update historical data for predictive scaling"""
+        now = datetime.utcnow()
+        hour_of_week = now.weekday() * 24 + now.hour
+        
+        if hour_of_week not in self.historical_demand:
+            self.historical_demand[hour_of_week] = utilization
+        else:
+            # Exponential moving average (favor recent data)
+            current_avg = self.historical_demand[hour_of_week]
+            self.historical_demand[hour_of_week] = (current_avg * 0.9) + (utilization * 0.1)
+
+    def _predict_demand(self, lookahead_hours: int = 1) -> float:
+        """Predict expected utilization based on historical patterns"""
+        if not self.policy.predictive_scaling or not self.historical_demand:
+            return 0.0
+            
+        now = datetime.utcnow()
+        target_hour = (now.weekday() * 24 + now.hour + lookahead_hours) % 168
+        
+        # If we have exact data for that hour
+        if target_hour in self.historical_demand:
+            return self.historical_demand[target_hour]
+            
+        # Find nearest available data points
+        available_hours = sorted(self.historical_demand.keys())
+        if not available_hours:
+            return 0.0
+            
+        # Simplistic interpolation
+        return sum(self.historical_demand.values()) / len(self.historical_demand)
+        
+    async def _scaling_loop(self):
+        """Background task that evaluates scaling rules periodically"""
+        while self.is_running:
+            try:
+                # In a real system, we'd fetch this from the Monitor or Coordinator
+                # Here we simulate fetching current metrics
+                current_utilization = self._get_current_utilization()
+                current_queue_depth = self._get_queue_depth()
+                
+                self.update_historical_demand(current_utilization)
+                
+                await self.evaluate_scaling(current_utilization, current_queue_depth)
+                
+                # Check every 10 seconds
+                await asyncio.sleep(10.0)
+                
+            except asyncio.CancelledError:
+                break
+            except Exception as e:
+                logger.error(f"Error in scaling loop: {e}")
+                await asyncio.sleep(10.0)
+
+    async def evaluate_scaling(self, current_utilization: float, queue_depth: int) -> Optional[Dict[str, Any]]:
+        """Evaluate if scaling action is needed and execute if necessary"""
+        now = time.time()
+        
+        # Check cooldown
+        if now - self.last_scaling_action_time < self.policy.cooldown_period_sec:
+            return None
+            
+        predicted_utilization = self._predict_demand()
+        
+        # Determine target node count
+        target_nodes = self.current_nodes
+        action = None
+        reason = ""
+        
+        # Scale UP conditions
+        if current_utilization > self.policy.scale_up_threshold or queue_depth > self.current_nodes * 5:
+            # Reactive scale up
+            desired_increase = math.ceil(self.current_nodes * (current_utilization / self.policy.target_utilization - 1.0))
+            # Ensure we add at least 1, but bounded by queue depth and max_nodes
+            nodes_to_add = max(1, min(desired_increase, max(1, queue_depth // 2)))
+            
+            target_nodes = min(self.policy.max_nodes, self.current_nodes + nodes_to_add)
+            
+            if target_nodes > self.current_nodes:
+                action = "scale_up"
+                reason = f"High utilization ({current_utilization*100:.1f}%) or queue depth ({queue_depth})"
+                
+        elif self.policy.predictive_scaling and predicted_utilization > self.policy.scale_up_threshold:
+            # Predictive scale up (proactive)
+            # Add nodes more conservatively for predictive scaling
+            target_nodes = min(self.policy.max_nodes, self.current_nodes + 1)
+            
+            if target_nodes > self.current_nodes:
+                action = "scale_up"
+                reason = f"Predictive scaling (expected {predicted_utilization*100:.1f}% util)"
+                
+        # Scale DOWN conditions
+        elif current_utilization < self.policy.scale_down_threshold and queue_depth == 0:
+            # Only scale down if predicted utilization is also low
+            if not self.policy.predictive_scaling or predicted_utilization < self.policy.target_utilization:
+                # Remove nodes conservatively
+                nodes_to_remove = max(1, int(self.current_nodes * 0.2))
+                target_nodes = max(self.policy.min_nodes, self.current_nodes - nodes_to_remove)
+                
+                if target_nodes < self.current_nodes:
+                    action = "scale_down"
+                    reason = f"Low utilization ({current_utilization*100:.1f}%)"
+                    
+        # Execute scaling if needed
+        if action and target_nodes != self.current_nodes:
+            diff = abs(target_nodes - self.current_nodes)
+            
+            result = await self._execute_scaling(action, diff, target_nodes)
+            
+            record = {
+                "timestamp": datetime.utcnow().isoformat(),
+                "action": action,
+                "nodes_changed": diff,
+                "new_total": target_nodes,
+                "reason": reason,
+                "metrics_at_time": {
+                    "utilization": current_utilization,
+                    "queue_depth": queue_depth,
+                    "predicted_utilization": predicted_utilization
+                }
+            }
+            
+            self.scaling_history.append(record)
+            # Keep history manageable
+            if len(self.scaling_history) > 1000:
+                self.scaling_history = self.scaling_history[-1000:]
+                
+            self.last_scaling_action_time = now
+            self.current_nodes = target_nodes
+            
+            logger.info(f"Auto-scaler: {action.upper()} to {target_nodes} nodes. Reason: {reason}")
+            return record
+            
+        return None
+
+    async def _execute_scaling(self, action: str, count: int, new_total: int) -> bool:
+        """Execute the actual scaling action (e.g. interacting with Kubernetes/Docker/Cloud provider)"""
+        # In this implementation, we simulate the scaling delay
+        # In production, this would call cloud APIs (AWS AutoScaling, K8s Scale, etc.)
+        logger.debug(f"Executing {action} by {count} nodes...")
+        
+        # Simulate API delay
+        await asyncio.sleep(2.0)
+        
+        if action == "scale_up":
+            # Simulate provisioning new instances
+            # We assume a mix of CPU and GPU instances based on demand
+            new_gpus = count // 2
+            new_cpus = count - new_gpus
+            self.active_gpu_nodes += new_gpus
+            self.active_cpu_nodes += new_cpus
+        elif action == "scale_down":
+            # Simulate de-provisioning
+            # Prefer removing CPU nodes first if we have GPU ones
+            remove_cpus = min(count, max(0, self.active_cpu_nodes - self.policy.min_nodes))
+            remove_gpus = count - remove_cpus
+            
+            self.active_cpu_nodes -= remove_cpus
+            self.active_gpu_nodes = max(0, self.active_gpu_nodes - remove_gpus)
+            
+        return True
+
+    # --- Simulation helpers ---
+    def _get_current_utilization(self) -> float:
+        """Simulate getting current cluster utilization"""
+        # In reality, fetch from MarketplaceMonitor or Coordinator
+        import random
+        # Base utilization with some noise
+        base = 0.6
+        return max(0.1, min(0.99, base + random.uniform(-0.2, 0.3)))
+        
+    def _get_queue_depth(self) -> int:
+        """Simulate getting current queue depth"""
+        import random
+        if random.random() > 0.8:
+            return random.randint(10, 50)
+        return random.randint(0, 5)
+
+    def get_status(self) -> Dict[str, Any]:
+        """Get current scaler status"""
+        return {
+            "status": "running" if self.is_running else "stopped",
+            "current_nodes": {
+                "total": self.current_nodes,
+                "cpu_nodes": self.active_cpu_nodes,
+                "gpu_nodes": self.active_gpu_nodes
+            },
+            "policy": {
+                "min_nodes": self.policy.min_nodes,
+                "max_nodes": self.policy.max_nodes,
+                "target_utilization": self.policy.target_utilization
+            },
+            "last_action": self.scaling_history[-1] if self.scaling_history else None,
+            "prediction": {
+                "next_hour_utilization_estimate": round(self._predict_demand(1), 3)
+            }
+        }