feat: add marketplace metrics, privacy features, and service registry endpoints
- Add Prometheus metrics for marketplace API throughput and error rates with new dashboard panels - Implement confidential transaction models with encryption support and access control - Add key management system with registration, rotation, and audit logging - Create services and registry routers for service discovery and management - Integrate ZK proof generation for privacy-preserving receipts - Add metrics instru
This commit is contained in:
215
apps/miner-node/plugins/whisper.py
Normal file
215
apps/miner-node/plugins/whisper.py
Normal file
@ -0,0 +1,215 @@
|
||||
"""
|
||||
Whisper speech recognition plugin
|
||||
"""
|
||||
|
||||
import asyncio
|
||||
import os
|
||||
import tempfile
|
||||
from typing import Dict, Any, List
|
||||
import time
|
||||
|
||||
from .base import GPUPlugin, PluginResult
|
||||
from .exceptions import PluginExecutionError
|
||||
|
||||
|
||||
class WhisperPlugin(GPUPlugin):
|
||||
"""Plugin for Whisper speech recognition"""
|
||||
|
||||
def __init__(self):
|
||||
super().__init__()
|
||||
self.service_id = "whisper"
|
||||
self.name = "Whisper Speech Recognition"
|
||||
self.version = "1.0.0"
|
||||
self.description = "Transcribe and translate audio files using OpenAI Whisper"
|
||||
self.capabilities = ["transcribe", "translate"]
|
||||
self._model_cache = {}
|
||||
|
||||
def setup(self) -> None:
|
||||
"""Initialize Whisper dependencies"""
|
||||
super().setup()
|
||||
|
||||
# Check for whisper installation
|
||||
try:
|
||||
import whisper
|
||||
self.whisper = whisper
|
||||
except ImportError:
|
||||
raise PluginExecutionError("Whisper not installed. Install with: pip install openai-whisper")
|
||||
|
||||
# Check for ffmpeg
|
||||
import subprocess
|
||||
try:
|
||||
subprocess.run(["ffmpeg", "-version"], capture_output=True, check=True)
|
||||
except (subprocess.CalledProcessError, FileNotFoundError):
|
||||
raise PluginExecutionError("FFmpeg not found. Install FFmpeg for audio processing")
|
||||
|
||||
def validate_request(self, request: Dict[str, Any]) -> List[str]:
|
||||
"""Validate Whisper request parameters"""
|
||||
errors = []
|
||||
|
||||
# Check required parameters
|
||||
if "audio_url" not in request and "audio_file" not in request:
|
||||
errors.append("Either 'audio_url' or 'audio_file' must be provided")
|
||||
|
||||
# Validate model
|
||||
model = request.get("model", "base")
|
||||
valid_models = ["tiny", "base", "small", "medium", "large", "large-v2", "large-v3"]
|
||||
if model not in valid_models:
|
||||
errors.append(f"Invalid model. Must be one of: {', '.join(valid_models)}")
|
||||
|
||||
# Validate task
|
||||
task = request.get("task", "transcribe")
|
||||
if task not in ["transcribe", "translate"]:
|
||||
errors.append("Task must be 'transcribe' or 'translate'")
|
||||
|
||||
# Validate language
|
||||
if "language" in request:
|
||||
language = request["language"]
|
||||
if not isinstance(language, str) or len(language) != 2:
|
||||
errors.append("Language must be a 2-letter language code (e.g., 'en', 'es')")
|
||||
|
||||
return errors
|
||||
|
||||
def get_hardware_requirements(self) -> Dict[str, Any]:
|
||||
"""Get hardware requirements for Whisper"""
|
||||
return {
|
||||
"gpu": "recommended",
|
||||
"vram_gb": 2,
|
||||
"ram_gb": 4,
|
||||
"storage_gb": 1
|
||||
}
|
||||
|
||||
async def execute(self, request: Dict[str, Any]) -> PluginResult:
|
||||
"""Execute Whisper transcription"""
|
||||
start_time = time.time()
|
||||
|
||||
try:
|
||||
# Validate request
|
||||
errors = self.validate_request(request)
|
||||
if errors:
|
||||
return PluginResult(
|
||||
success=False,
|
||||
error=f"Validation failed: {'; '.join(errors)}"
|
||||
)
|
||||
|
||||
# Get parameters
|
||||
model_name = request.get("model", "base")
|
||||
task = request.get("task", "transcribe")
|
||||
language = request.get("language")
|
||||
temperature = request.get("temperature", 0.0)
|
||||
|
||||
# Load or get cached model
|
||||
model = await self._load_model(model_name)
|
||||
|
||||
# Get audio file
|
||||
audio_path = await self._get_audio_file(request)
|
||||
|
||||
# Transcribe
|
||||
loop = asyncio.get_event_loop()
|
||||
|
||||
if task == "translate":
|
||||
result = await loop.run_in_executor(
|
||||
None,
|
||||
lambda: model.transcribe(
|
||||
audio_path,
|
||||
task="translate",
|
||||
temperature=temperature
|
||||
)
|
||||
)
|
||||
else:
|
||||
result = await loop.run_in_executor(
|
||||
None,
|
||||
lambda: model.transcribe(
|
||||
audio_path,
|
||||
language=language,
|
||||
temperature=temperature
|
||||
)
|
||||
)
|
||||
|
||||
# Clean up
|
||||
if audio_path != request.get("audio_file"):
|
||||
os.unlink(audio_path)
|
||||
|
||||
execution_time = time.time() - start_time
|
||||
|
||||
return PluginResult(
|
||||
success=True,
|
||||
data={
|
||||
"text": result["text"],
|
||||
"language": result.get("language"),
|
||||
"segments": result.get("segments", [])
|
||||
},
|
||||
metrics={
|
||||
"model": model_name,
|
||||
"task": task,
|
||||
"audio_duration": result.get("duration"),
|
||||
"processing_time": execution_time,
|
||||
"real_time_factor": result.get("duration", 0) / execution_time if execution_time > 0 else 0
|
||||
},
|
||||
execution_time=execution_time
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
return PluginResult(
|
||||
success=False,
|
||||
error=str(e),
|
||||
execution_time=time.time() - start_time
|
||||
)
|
||||
|
||||
async def _load_model(self, model_name: str):
|
||||
"""Load Whisper model with caching"""
|
||||
if model_name not in self._model_cache:
|
||||
loop = asyncio.get_event_loop()
|
||||
model = await loop.run_in_executor(
|
||||
None,
|
||||
lambda: self.whisper.load_model(model_name)
|
||||
)
|
||||
self._model_cache[model_name] = model
|
||||
|
||||
return self._model_cache[model_name]
|
||||
|
||||
async def _get_audio_file(self, request: Dict[str, Any]) -> str:
|
||||
"""Get audio file from URL or direct file path"""
|
||||
if "audio_file" in request:
|
||||
return request["audio_file"]
|
||||
|
||||
# Download from URL
|
||||
audio_url = request["audio_url"]
|
||||
|
||||
# Use requests to download
|
||||
import requests
|
||||
|
||||
response = requests.get(audio_url, stream=True)
|
||||
response.raise_for_status()
|
||||
|
||||
# Save to temporary file
|
||||
suffix = self._get_audio_suffix(audio_url)
|
||||
with tempfile.NamedTemporaryFile(delete=False, suffix=suffix) as f:
|
||||
for chunk in response.iter_content(chunk_size=8192):
|
||||
f.write(chunk)
|
||||
return f.name
|
||||
|
||||
def _get_audio_suffix(self, url: str) -> str:
|
||||
"""Get file extension from URL"""
|
||||
if url.endswith('.mp3'):
|
||||
return '.mp3'
|
||||
elif url.endswith('.wav'):
|
||||
return '.wav'
|
||||
elif url.endswith('.m4a'):
|
||||
return '.m4a'
|
||||
elif url.endswith('.flac'):
|
||||
return '.flac'
|
||||
else:
|
||||
return '.mp3' # Default
|
||||
|
||||
async def health_check(self) -> bool:
|
||||
"""Check Whisper health"""
|
||||
try:
|
||||
# Check if we can load the tiny model
|
||||
await self._load_model("tiny")
|
||||
return True
|
||||
except Exception:
|
||||
return False
|
||||
|
||||
def cleanup(self) -> None:
|
||||
"""Cleanup resources"""
|
||||
self._model_cache.clear()
|
||||
Reference in New Issue
Block a user