aitbc/apps/miner-node/plugins/whisper.py

"""
Whisper speech recognition plugin
"""

import asyncio
import os
import tempfile
from typing import Dict, Any, List
import time

from .base import GPUPlugin, PluginResult
from .exceptions import PluginExecutionError


class WhisperPlugin(GPUPlugin):
    """Plugin for Whisper speech recognition"""

    def __init__(self):
        super().__init__()
        self.service_id = "whisper"
        self.name = "Whisper Speech Recognition"
        self.version = "1.0.0"
        self.description = "Transcribe and translate audio files using OpenAI Whisper"
        self.capabilities = ["transcribe", "translate"]
        self._model_cache = {}

    def setup(self) -> None:
        """Initialize Whisper dependencies"""
        super().setup()

        # Check for whisper installation
        try:
            import whisper
            self.whisper = whisper
        except ImportError:
            raise PluginExecutionError("Whisper not installed. Install with: pip install openai-whisper")

        # Check for ffmpeg
        import subprocess
        try:
            subprocess.run(["ffmpeg", "-version"], capture_output=True, check=True)
        except (subprocess.CalledProcessError, FileNotFoundError):
            raise PluginExecutionError("FFmpeg not found. Install FFmpeg for audio processing")

    def validate_request(self, request: Dict[str, Any]) -> List[str]:
        """Validate Whisper request parameters"""
        errors = []

        # Check required parameters
        if "audio_url" not in request and "audio_file" not in request:
            errors.append("Either 'audio_url' or 'audio_file' must be provided")

        # Validate model
        model = request.get("model", "base")
        valid_models = ["tiny", "base", "small", "medium", "large", "large-v2", "large-v3"]
        if model not in valid_models:
            errors.append(f"Invalid model. Must be one of: {', '.join(valid_models)}")

        # Validate task
        task = request.get("task", "transcribe")
        if task not in ["transcribe", "translate"]:
            errors.append("Task must be 'transcribe' or 'translate'")

        # Validate language
        if "language" in request:
            language = request["language"]
            if not isinstance(language, str) or len(language) != 2:
                errors.append("Language must be a 2-letter language code (e.g., 'en', 'es')")

        return errors

    def get_hardware_requirements(self) -> Dict[str, Any]:
        """Get hardware requirements for Whisper"""
        return {
            "gpu": "recommended",
            "vram_gb": 2,
            "ram_gb": 4,
            "storage_gb": 1
        }

    async def execute(self, request: Dict[str, Any]) -> PluginResult:
        """Execute Whisper transcription"""
        start_time = time.time()

        try:
            # Validate request
            errors = self.validate_request(request)
            if errors:
                return PluginResult(
                    success=False,
                    error=f"Validation failed: {'; '.join(errors)}"
                )

            # Get parameters
            model_name = request.get("model", "base")
            task = request.get("task", "transcribe")
            language = request.get("language")
            temperature = request.get("temperature", 0.0)

            # Load or get cached model
            model = await self._load_model(model_name)

            # Get audio file
            audio_path = await self._get_audio_file(request)

            # Transcribe
            loop = asyncio.get_event_loop()

            if task == "translate":
                result = await loop.run_in_executor(
                    None,
                    lambda: model.transcribe(
                        audio_path,
                        task="translate",
                        temperature=temperature
                    )
                )
            else:
                result = await loop.run_in_executor(
                    None,
                    lambda: model.transcribe(
                        audio_path,
                        language=language,
                        temperature=temperature
                    )
                )

            # Clean up
            if audio_path != request.get("audio_file"):
                os.unlink(audio_path)

            execution_time = time.time() - start_time

            return PluginResult(
                success=True,
                data={
                    "text": result["text"],
                    "language": result.get("language"),
                    "segments": result.get("segments", [])
                },
                metrics={
                    "model": model_name,
                    "task": task,
                    "audio_duration": result.get("duration"),
                    "processing_time": execution_time,
                    "real_time_factor": result.get("duration", 0) / execution_time if execution_time > 0 else 0
                },
                execution_time=execution_time
            )

        except Exception as e:
            return PluginResult(
                success=False,
                error=str(e),
                execution_time=time.time() - start_time
            )

    async def _load_model(self, model_name: str):
        """Load Whisper model with caching"""
        if model_name not in self._model_cache:
            loop = asyncio.get_event_loop()
            model = await loop.run_in_executor(
                None,
                lambda: self.whisper.load_model(model_name)
            )
            self._model_cache[model_name] = model

        return self._model_cache[model_name]

    async def _get_audio_file(self, request: Dict[str, Any]) -> str:
        """Get audio file from URL or direct file path"""
        if "audio_file" in request:
            return request["audio_file"]

        # Download from URL
        audio_url = request["audio_url"]

        # Use requests to download
        import requests

        response = requests.get(audio_url, stream=True)
        response.raise_for_status()

        # Save to temporary file
        suffix = self._get_audio_suffix(audio_url)
        with tempfile.NamedTemporaryFile(delete=False, suffix=suffix) as f:
            for chunk in response.iter_content(chunk_size=8192):
                f.write(chunk)
            return f.name

    def _get_audio_suffix(self, url: str) -> str:
        """Get file extension from URL"""
        if url.endswith('.mp3'):
            return '.mp3'
        elif url.endswith('.wav'):
            return '.wav'
        elif url.endswith('.m4a'):
            return '.m4a'
        elif url.endswith('.flac'):
            return '.flac'
        else:
            return '.mp3'  # Default

    async def health_check(self) -> bool:
        """Check Whisper health"""
        try:
            # Check if we can load the tiny model
            await self._load_model("tiny")
            return True
        except Exception:
            return False

    def cleanup(self) -> None:
        """Cleanup resources"""
        self._model_cache.clear()