"""Multi-modal processing commands for AITBC CLI""" import click import httpx import json import base64 import mimetypes from typing import Optional, Dict, Any, List from pathlib import Path from utils import output, error, success, warning @click.group() def multimodal(): """Multi-modal agent processing and cross-modal operations""" pass @multimodal.command() @click.option("--name", required=True, help="Multi-modal agent name") @click.option("--modalities", required=True, help="Comma-separated modalities (text,image,audio,video)") @click.option("--description", default="", help="Agent description") @click.option("--model-config", type=click.File('r'), help="Model configuration JSON file") @click.option("--gpu-acceleration", is_flag=True, help="Enable GPU acceleration") @click.pass_context def agent(ctx, name: str, modalities: str, description: str, model_config, gpu_acceleration: bool): """Create multi-modal agent""" config = ctx.obj['config'] modality_list = [mod.strip() for mod in modalities.split(',')] agent_data = { "name": name, "description": description, "modalities": modality_list, "gpu_acceleration": gpu_acceleration, "agent_type": "multimodal" } if model_config: try: config_data = json.load(model_config) agent_data["model_config"] = config_data except Exception as e: error(f"Failed to read model config file: {e}") return try: with httpx.Client() as client: response = client.post( f"{config.coordinator_url}/multimodal/agents", headers={"X-Api-Key": config.api_key or ""}, json=agent_data ) if response.status_code == 201: agent = response.json() success(f"Multi-modal agent created: {agent['id']}") output(agent, ctx.obj['output_format']) else: error(f"Failed to create multi-modal agent: {response.status_code}") if response.text: error(response.text) ctx.exit(1) except Exception as e: error(f"Network error: {e}") ctx.exit(1) @multimodal.command() @click.argument("agent_id") @click.option("--text", help="Text input") @click.option("--image", type=click.Path(exists=True), help="Image file path") @click.option("--audio", type=click.Path(exists=True), help="Audio file path") @click.option("--video", type=click.Path(exists=True), help="Video file path") @click.option("--output-format", default="json", type=click.Choice(["json", "text", "binary"]), help="Output format for results") @click.pass_context def process(ctx, agent_id: str, text: Optional[str], image: Optional[str], audio: Optional[str], video: Optional[str], output_format: str): """Process multi-modal inputs with agent""" config = ctx.obj['config'] # Prepare multi-modal data modal_data = {} if text: modal_data["text"] = text if image: try: with open(image, 'rb') as f: image_data = f.read() modal_data["image"] = { "data": base64.b64encode(image_data).decode(), "mime_type": mimetypes.guess_type(image)[0] or "image/jpeg", "filename": Path(image).name } except Exception as e: error(f"Failed to read image file: {e}") return if audio: try: with open(audio, 'rb') as f: audio_data = f.read() modal_data["audio"] = { "data": base64.b64encode(audio_data).decode(), "mime_type": mimetypes.guess_type(audio)[0] or "audio/wav", "filename": Path(audio).name } except Exception as e: error(f"Failed to read audio file: {e}") return if video: try: with open(video, 'rb') as f: video_data = f.read() modal_data["video"] = { "data": base64.b64encode(video_data).decode(), "mime_type": mimetypes.guess_type(video)[0] or "video/mp4", "filename": Path(video).name } except Exception as e: error(f"Failed to read video file: {e}") return if not modal_data: error("At least one modality input must be provided") return process_data = { "modalities": modal_data, "output_format": output_format } try: with httpx.Client() as client: response = client.post( f"{config.coordinator_url}/multimodal/agents/{agent_id}/process", headers={"X-Api-Key": config.api_key or ""}, json=process_data ) if response.status_code == 200: result = response.json() success("Multi-modal processing completed") output(result, ctx.obj['output_format']) else: error(f"Failed to process multi-modal inputs: {response.status_code}") if response.text: error(response.text) ctx.exit(1) except Exception as e: error(f"Network error: {e}") ctx.exit(1) @multimodal.command() @click.argument("agent_id") @click.option("--dataset", default="coco_vqa", help="Dataset name for benchmarking") @click.option("--metrics", default="accuracy,latency", help="Comma-separated metrics to evaluate") @click.option("--iterations", default=100, help="Number of benchmark iterations") @click.pass_context def benchmark(ctx, agent_id: str, dataset: str, metrics: str, iterations: int): """Benchmark multi-modal agent performance""" config = ctx.obj['config'] benchmark_data = { "dataset": dataset, "metrics": [m.strip() for m in metrics.split(',')], "iterations": iterations } try: with httpx.Client() as client: response = client.post( f"{config.coordinator_url}/multimodal/agents/{agent_id}/benchmark", headers={"X-Api-Key": config.api_key or ""}, json=benchmark_data ) if response.status_code == 202: benchmark = response.json() success(f"Benchmark started: {benchmark['id']}") output(benchmark, ctx.obj['output_format']) else: error(f"Failed to start benchmark: {response.status_code}") if response.text: error(response.text) ctx.exit(1) except Exception as e: error(f"Network error: {e}") ctx.exit(1) @multimodal.command() @click.argument("agent_id") @click.option("--objective", default="throughput", type=click.Choice(["throughput", "latency", "accuracy", "efficiency"]), help="Optimization objective") @click.option("--target", help="Target value for optimization") @click.pass_context def optimize(ctx, agent_id: str, objective: str, target: Optional[str]): """Optimize multi-modal agent pipeline""" config = ctx.obj['config'] optimization_data = {"objective": objective} if target: optimization_data["target"] = target try: with httpx.Client() as client: response = client.post( f"{config.coordinator_url}/multimodal/agents/{agent_id}/optimize", headers={"X-Api-Key": config.api_key or ""}, json=optimization_data ) if response.status_code == 200: result = response.json() success(f"Multi-modal optimization completed") output(result, ctx.obj['output_format']) else: error(f"Failed to optimize agent: {response.status_code}") if response.text: error(response.text) ctx.exit(1) except Exception as e: error(f"Network error: {e}") ctx.exit(1) @click.group() def convert(): """Cross-modal conversion operations""" pass multimodal.add_command(convert) @convert.command() @click.option("--input", "input_path", required=True, type=click.Path(exists=True), help="Input file path") @click.option("--output", "output_format", required=True, type=click.Choice(["text", "image", "audio", "video"]), help="Output modality") @click.option("--model", default="blip", help="Conversion model to use") @click.option("--output-file", type=click.Path(), help="Output file path") @click.pass_context def convert(ctx, input_path: str, output_format: str, model: str, output_file: Optional[str]): """Convert between modalities""" config = ctx.obj['config'] # Read input file try: with open(input_path, 'rb') as f: input_data = f.read() except Exception as e: error(f"Failed to read input file: {e}") return conversion_data = { "input": { "data": base64.b64encode(input_data).decode(), "mime_type": mimetypes.guess_type(input_path)[0] or "application/octet-stream", "filename": Path(input_path).name }, "output_modality": output_format, "model": model } try: with httpx.Client() as client: response = client.post( f"{config.coordinator_url}/multimodal/convert", headers={"X-Api-Key": config.api_key or ""}, json=conversion_data ) if response.status_code == 200: result = response.json() if output_file and result.get("output_data"): # Decode and save output output_data = base64.b64decode(result["output_data"]) with open(output_file, 'wb') as f: f.write(output_data) success(f"Conversion output saved to {output_file}") else: output(result, ctx.obj['output_format']) else: error(f"Failed to convert modality: {response.status_code}") if response.text: error(response.text) ctx.exit(1) except Exception as e: error(f"Network error: {e}") ctx.exit(1) @click.group() def search(): """Multi-modal search operations""" pass multimodal.add_command(search) @search.command() @click.argument("query") @click.option("--modalities", default="image,text", help="Comma-separated modalities to search") @click.option("--limit", default=20, help="Number of results to return") @click.option("--threshold", default=0.5, help="Similarity threshold") @click.pass_context def search(ctx, query: str, modalities: str, limit: int, threshold: float): """Multi-modal search across different modalities""" config = ctx.obj['config'] search_data = { "query": query, "modalities": [m.strip() for m in modalities.split(',')], "limit": limit, "threshold": threshold } try: with httpx.Client() as client: response = client.post( f"{config.coordinator_url}/multimodal/search", headers={"X-Api-Key": config.api_key or ""}, json=search_data ) if response.status_code == 200: results = response.json() output(results, ctx.obj['output_format']) else: error(f"Failed to perform multi-modal search: {response.status_code}") ctx.exit(1) except Exception as e: error(f"Network error: {e}") ctx.exit(1) @click.group() def attention(): """Cross-modal attention analysis""" pass multimodal.add_command(attention) @attention.command() @click.argument("agent_id") @click.option("--inputs", type=click.File('r'), required=True, help="Multi-modal inputs JSON file") @click.option("--visualize", is_flag=True, help="Generate attention visualization") @click.option("--output", type=click.Path(), help="Output file for visualization") @click.pass_context def attention(ctx, agent_id: str, inputs, visualize: bool, output: Optional[str]): """Analyze cross-modal attention patterns""" config = ctx.obj['config'] try: inputs_data = json.load(inputs) except Exception as e: error(f"Failed to read inputs file: {e}") return attention_data = { "inputs": inputs_data, "visualize": visualize } try: with httpx.Client() as client: response = client.post( f"{config.coordinator_url}/multimodal/agents/{agent_id}/attention", headers={"X-Api-Key": config.api_key or ""}, json=attention_data ) if response.status_code == 200: result = response.json() if visualize and output and result.get("visualization"): # Save visualization viz_data = base64.b64decode(result["visualization"]) with open(output, 'wb') as f: f.write(viz_data) success(f"Attention visualization saved to {output}") else: output(result, ctx.obj['output_format']) else: error(f"Failed to analyze attention: {response.status_code}") if response.text: error(response.text) ctx.exit(1) except Exception as e: error(f"Network error: {e}") ctx.exit(1) @multimodal.command() @click.argument("agent_id") @click.pass_context def capabilities(ctx, agent_id: str): """List multi-modal agent capabilities""" config = ctx.obj['config'] try: with httpx.Client() as client: response = client.get( f"{config.coordinator_url}/multimodal/agents/{agent_id}/capabilities", headers={"X-Api-Key": config.api_key or ""} ) if response.status_code == 200: capabilities = response.json() output(capabilities, ctx.obj['output_format']) else: error(f"Failed to get agent capabilities: {response.status_code}") ctx.exit(1) except Exception as e: error(f"Network error: {e}") ctx.exit(1) @multimodal.command() @click.argument("agent_id") @click.option("--modality", required=True, type=click.Choice(["text", "image", "audio", "video"]), help="Modality to test") @click.option("--test-data", type=click.File('r'), help="Test data JSON file") @click.pass_context def test(ctx, agent_id: str, modality: str, test_data): """Test individual modality processing""" config = ctx.obj['config'] test_input = {} if test_data: try: test_input = json.load(test_data) except Exception as e: error(f"Failed to read test data file: {e}") return try: with httpx.Client() as client: response = client.post( f"{config.coordinator_url}/multimodal/agents/{agent_id}/test/{modality}", headers={"X-Api-Key": config.api_key or ""}, json=test_input ) if response.status_code == 200: result = response.json() success(f"Modality test completed for {modality}") output(result, ctx.obj['output_format']) else: error(f"Failed to test modality: {response.status_code}") if response.text: error(response.text) ctx.exit(1) except Exception as e: error(f"Network error: {e}") ctx.exit(1)