refactor: flatten CLI directory structure - remove 'box in a box'
BEFORE: /opt/aitbc/cli/ ├── aitbc_cli/ # Python package (box in a box) │ ├── commands/ │ ├── main.py │ └── ... ├── setup.py AFTER: /opt/aitbc/cli/ # Flat structure ├── commands/ # Direct access ├── main.py # Direct access ├── auth/ ├── config/ ├── core/ ├── models/ ├── utils/ ├── plugins.py └── setup.py CHANGES MADE: - Moved all files from aitbc_cli/ to cli/ root - Fixed all relative imports (from . to absolute imports) - Updated setup.py entry point: aitbc_cli.main → main - Added CLI directory to Python path in entry script - Simplified deployment.py to remove dependency on deleted core.deployment - Fixed import paths in all command files - Recreated virtual environment with new structure BENEFITS: - Eliminated 'box in a box' nesting - Simpler directory structure - Direct access to all modules - Cleaner imports - Easier maintenance and development - CLI works with both 'python main.py' and 'aitbc' commands
This commit is contained in:
470
cli/commands/multimodal.py
Executable file
470
cli/commands/multimodal.py
Executable file
@@ -0,0 +1,470 @@
|
||||
"""Multi-modal processing commands for AITBC CLI"""
|
||||
|
||||
import click
|
||||
import httpx
|
||||
import json
|
||||
import base64
|
||||
import mimetypes
|
||||
from typing import Optional, Dict, Any, List
|
||||
from pathlib import Path
|
||||
from utils import output, error, success, warning
|
||||
|
||||
|
||||
@click.group()
|
||||
def multimodal():
|
||||
"""Multi-modal agent processing and cross-modal operations"""
|
||||
pass
|
||||
|
||||
|
||||
@multimodal.command()
|
||||
@click.option("--name", required=True, help="Multi-modal agent name")
|
||||
@click.option("--modalities", required=True, help="Comma-separated modalities (text,image,audio,video)")
|
||||
@click.option("--description", default="", help="Agent description")
|
||||
@click.option("--model-config", type=click.File('r'), help="Model configuration JSON file")
|
||||
@click.option("--gpu-acceleration", is_flag=True, help="Enable GPU acceleration")
|
||||
@click.pass_context
|
||||
def agent(ctx, name: str, modalities: str, description: str, model_config, gpu_acceleration: bool):
|
||||
"""Create multi-modal agent"""
|
||||
config = ctx.obj['config']
|
||||
|
||||
modality_list = [mod.strip() for mod in modalities.split(',')]
|
||||
|
||||
agent_data = {
|
||||
"name": name,
|
||||
"description": description,
|
||||
"modalities": modality_list,
|
||||
"gpu_acceleration": gpu_acceleration,
|
||||
"agent_type": "multimodal"
|
||||
}
|
||||
|
||||
if model_config:
|
||||
try:
|
||||
config_data = json.load(model_config)
|
||||
agent_data["model_config"] = config_data
|
||||
except Exception as e:
|
||||
error(f"Failed to read model config file: {e}")
|
||||
return
|
||||
|
||||
try:
|
||||
with httpx.Client() as client:
|
||||
response = client.post(
|
||||
f"{config.coordinator_url}/multimodal/agents",
|
||||
headers={"X-Api-Key": config.api_key or ""},
|
||||
json=agent_data
|
||||
)
|
||||
|
||||
if response.status_code == 201:
|
||||
agent = response.json()
|
||||
success(f"Multi-modal agent created: {agent['id']}")
|
||||
output(agent, ctx.obj['output_format'])
|
||||
else:
|
||||
error(f"Failed to create multi-modal agent: {response.status_code}")
|
||||
if response.text:
|
||||
error(response.text)
|
||||
ctx.exit(1)
|
||||
except Exception as e:
|
||||
error(f"Network error: {e}")
|
||||
ctx.exit(1)
|
||||
|
||||
|
||||
@multimodal.command()
|
||||
@click.argument("agent_id")
|
||||
@click.option("--text", help="Text input")
|
||||
@click.option("--image", type=click.Path(exists=True), help="Image file path")
|
||||
@click.option("--audio", type=click.Path(exists=True), help="Audio file path")
|
||||
@click.option("--video", type=click.Path(exists=True), help="Video file path")
|
||||
@click.option("--output-format", default="json", type=click.Choice(["json", "text", "binary"]),
|
||||
help="Output format for results")
|
||||
@click.pass_context
|
||||
def process(ctx, agent_id: str, text: Optional[str], image: Optional[str],
|
||||
audio: Optional[str], video: Optional[str], output_format: str):
|
||||
"""Process multi-modal inputs with agent"""
|
||||
config = ctx.obj['config']
|
||||
|
||||
# Prepare multi-modal data
|
||||
modal_data = {}
|
||||
|
||||
if text:
|
||||
modal_data["text"] = text
|
||||
|
||||
if image:
|
||||
try:
|
||||
with open(image, 'rb') as f:
|
||||
image_data = f.read()
|
||||
modal_data["image"] = {
|
||||
"data": base64.b64encode(image_data).decode(),
|
||||
"mime_type": mimetypes.guess_type(image)[0] or "image/jpeg",
|
||||
"filename": Path(image).name
|
||||
}
|
||||
except Exception as e:
|
||||
error(f"Failed to read image file: {e}")
|
||||
return
|
||||
|
||||
if audio:
|
||||
try:
|
||||
with open(audio, 'rb') as f:
|
||||
audio_data = f.read()
|
||||
modal_data["audio"] = {
|
||||
"data": base64.b64encode(audio_data).decode(),
|
||||
"mime_type": mimetypes.guess_type(audio)[0] or "audio/wav",
|
||||
"filename": Path(audio).name
|
||||
}
|
||||
except Exception as e:
|
||||
error(f"Failed to read audio file: {e}")
|
||||
return
|
||||
|
||||
if video:
|
||||
try:
|
||||
with open(video, 'rb') as f:
|
||||
video_data = f.read()
|
||||
modal_data["video"] = {
|
||||
"data": base64.b64encode(video_data).decode(),
|
||||
"mime_type": mimetypes.guess_type(video)[0] or "video/mp4",
|
||||
"filename": Path(video).name
|
||||
}
|
||||
except Exception as e:
|
||||
error(f"Failed to read video file: {e}")
|
||||
return
|
||||
|
||||
if not modal_data:
|
||||
error("At least one modality input must be provided")
|
||||
return
|
||||
|
||||
process_data = {
|
||||
"modalities": modal_data,
|
||||
"output_format": output_format
|
||||
}
|
||||
|
||||
try:
|
||||
with httpx.Client() as client:
|
||||
response = client.post(
|
||||
f"{config.coordinator_url}/multimodal/agents/{agent_id}/process",
|
||||
headers={"X-Api-Key": config.api_key or ""},
|
||||
json=process_data
|
||||
)
|
||||
|
||||
if response.status_code == 200:
|
||||
result = response.json()
|
||||
success("Multi-modal processing completed")
|
||||
output(result, ctx.obj['output_format'])
|
||||
else:
|
||||
error(f"Failed to process multi-modal inputs: {response.status_code}")
|
||||
if response.text:
|
||||
error(response.text)
|
||||
ctx.exit(1)
|
||||
except Exception as e:
|
||||
error(f"Network error: {e}")
|
||||
ctx.exit(1)
|
||||
|
||||
|
||||
@multimodal.command()
|
||||
@click.argument("agent_id")
|
||||
@click.option("--dataset", default="coco_vqa", help="Dataset name for benchmarking")
|
||||
@click.option("--metrics", default="accuracy,latency", help="Comma-separated metrics to evaluate")
|
||||
@click.option("--iterations", default=100, help="Number of benchmark iterations")
|
||||
@click.pass_context
|
||||
def benchmark(ctx, agent_id: str, dataset: str, metrics: str, iterations: int):
|
||||
"""Benchmark multi-modal agent performance"""
|
||||
config = ctx.obj['config']
|
||||
|
||||
benchmark_data = {
|
||||
"dataset": dataset,
|
||||
"metrics": [m.strip() for m in metrics.split(',')],
|
||||
"iterations": iterations
|
||||
}
|
||||
|
||||
try:
|
||||
with httpx.Client() as client:
|
||||
response = client.post(
|
||||
f"{config.coordinator_url}/multimodal/agents/{agent_id}/benchmark",
|
||||
headers={"X-Api-Key": config.api_key or ""},
|
||||
json=benchmark_data
|
||||
)
|
||||
|
||||
if response.status_code == 202:
|
||||
benchmark = response.json()
|
||||
success(f"Benchmark started: {benchmark['id']}")
|
||||
output(benchmark, ctx.obj['output_format'])
|
||||
else:
|
||||
error(f"Failed to start benchmark: {response.status_code}")
|
||||
if response.text:
|
||||
error(response.text)
|
||||
ctx.exit(1)
|
||||
except Exception as e:
|
||||
error(f"Network error: {e}")
|
||||
ctx.exit(1)
|
||||
|
||||
|
||||
@multimodal.command()
|
||||
@click.argument("agent_id")
|
||||
@click.option("--objective", default="throughput",
|
||||
type=click.Choice(["throughput", "latency", "accuracy", "efficiency"]),
|
||||
help="Optimization objective")
|
||||
@click.option("--target", help="Target value for optimization")
|
||||
@click.pass_context
|
||||
def optimize(ctx, agent_id: str, objective: str, target: Optional[str]):
|
||||
"""Optimize multi-modal agent pipeline"""
|
||||
config = ctx.obj['config']
|
||||
|
||||
optimization_data = {"objective": objective}
|
||||
if target:
|
||||
optimization_data["target"] = target
|
||||
|
||||
try:
|
||||
with httpx.Client() as client:
|
||||
response = client.post(
|
||||
f"{config.coordinator_url}/multimodal/agents/{agent_id}/optimize",
|
||||
headers={"X-Api-Key": config.api_key or ""},
|
||||
json=optimization_data
|
||||
)
|
||||
|
||||
if response.status_code == 200:
|
||||
result = response.json()
|
||||
success(f"Multi-modal optimization completed")
|
||||
output(result, ctx.obj['output_format'])
|
||||
else:
|
||||
error(f"Failed to optimize agent: {response.status_code}")
|
||||
if response.text:
|
||||
error(response.text)
|
||||
ctx.exit(1)
|
||||
except Exception as e:
|
||||
error(f"Network error: {e}")
|
||||
ctx.exit(1)
|
||||
|
||||
|
||||
@click.group()
|
||||
def convert():
|
||||
"""Cross-modal conversion operations"""
|
||||
pass
|
||||
|
||||
|
||||
multimodal.add_command(convert)
|
||||
|
||||
|
||||
@convert.command()
|
||||
@click.option("--input", "input_path", required=True, type=click.Path(exists=True), help="Input file path")
|
||||
@click.option("--output", "output_format", required=True,
|
||||
type=click.Choice(["text", "image", "audio", "video"]),
|
||||
help="Output modality")
|
||||
@click.option("--model", default="blip", help="Conversion model to use")
|
||||
@click.option("--output-file", type=click.Path(), help="Output file path")
|
||||
@click.pass_context
|
||||
def convert(ctx, input_path: str, output_format: str, model: str, output_file: Optional[str]):
|
||||
"""Convert between modalities"""
|
||||
config = ctx.obj['config']
|
||||
|
||||
# Read input file
|
||||
try:
|
||||
with open(input_path, 'rb') as f:
|
||||
input_data = f.read()
|
||||
except Exception as e:
|
||||
error(f"Failed to read input file: {e}")
|
||||
return
|
||||
|
||||
conversion_data = {
|
||||
"input": {
|
||||
"data": base64.b64encode(input_data).decode(),
|
||||
"mime_type": mimetypes.guess_type(input_path)[0] or "application/octet-stream",
|
||||
"filename": Path(input_path).name
|
||||
},
|
||||
"output_modality": output_format,
|
||||
"model": model
|
||||
}
|
||||
|
||||
try:
|
||||
with httpx.Client() as client:
|
||||
response = client.post(
|
||||
f"{config.coordinator_url}/multimodal/convert",
|
||||
headers={"X-Api-Key": config.api_key or ""},
|
||||
json=conversion_data
|
||||
)
|
||||
|
||||
if response.status_code == 200:
|
||||
result = response.json()
|
||||
|
||||
if output_file and result.get("output_data"):
|
||||
# Decode and save output
|
||||
output_data = base64.b64decode(result["output_data"])
|
||||
with open(output_file, 'wb') as f:
|
||||
f.write(output_data)
|
||||
success(f"Conversion output saved to {output_file}")
|
||||
else:
|
||||
output(result, ctx.obj['output_format'])
|
||||
else:
|
||||
error(f"Failed to convert modality: {response.status_code}")
|
||||
if response.text:
|
||||
error(response.text)
|
||||
ctx.exit(1)
|
||||
except Exception as e:
|
||||
error(f"Network error: {e}")
|
||||
ctx.exit(1)
|
||||
|
||||
|
||||
@click.group()
|
||||
def search():
|
||||
"""Multi-modal search operations"""
|
||||
pass
|
||||
|
||||
|
||||
multimodal.add_command(search)
|
||||
|
||||
|
||||
@search.command()
|
||||
@click.argument("query")
|
||||
@click.option("--modalities", default="image,text", help="Comma-separated modalities to search")
|
||||
@click.option("--limit", default=20, help="Number of results to return")
|
||||
@click.option("--threshold", default=0.5, help="Similarity threshold")
|
||||
@click.pass_context
|
||||
def search(ctx, query: str, modalities: str, limit: int, threshold: float):
|
||||
"""Multi-modal search across different modalities"""
|
||||
config = ctx.obj['config']
|
||||
|
||||
search_data = {
|
||||
"query": query,
|
||||
"modalities": [m.strip() for m in modalities.split(',')],
|
||||
"limit": limit,
|
||||
"threshold": threshold
|
||||
}
|
||||
|
||||
try:
|
||||
with httpx.Client() as client:
|
||||
response = client.post(
|
||||
f"{config.coordinator_url}/multimodal/search",
|
||||
headers={"X-Api-Key": config.api_key or ""},
|
||||
json=search_data
|
||||
)
|
||||
|
||||
if response.status_code == 200:
|
||||
results = response.json()
|
||||
output(results, ctx.obj['output_format'])
|
||||
else:
|
||||
error(f"Failed to perform multi-modal search: {response.status_code}")
|
||||
ctx.exit(1)
|
||||
except Exception as e:
|
||||
error(f"Network error: {e}")
|
||||
ctx.exit(1)
|
||||
|
||||
|
||||
@click.group()
|
||||
def attention():
|
||||
"""Cross-modal attention analysis"""
|
||||
pass
|
||||
|
||||
|
||||
multimodal.add_command(attention)
|
||||
|
||||
|
||||
@attention.command()
|
||||
@click.argument("agent_id")
|
||||
@click.option("--inputs", type=click.File('r'), required=True, help="Multi-modal inputs JSON file")
|
||||
@click.option("--visualize", is_flag=True, help="Generate attention visualization")
|
||||
@click.option("--output", type=click.Path(), help="Output file for visualization")
|
||||
@click.pass_context
|
||||
def attention(ctx, agent_id: str, inputs, visualize: bool, output: Optional[str]):
|
||||
"""Analyze cross-modal attention patterns"""
|
||||
config = ctx.obj['config']
|
||||
|
||||
try:
|
||||
inputs_data = json.load(inputs)
|
||||
except Exception as e:
|
||||
error(f"Failed to read inputs file: {e}")
|
||||
return
|
||||
|
||||
attention_data = {
|
||||
"inputs": inputs_data,
|
||||
"visualize": visualize
|
||||
}
|
||||
|
||||
try:
|
||||
with httpx.Client() as client:
|
||||
response = client.post(
|
||||
f"{config.coordinator_url}/multimodal/agents/{agent_id}/attention",
|
||||
headers={"X-Api-Key": config.api_key or ""},
|
||||
json=attention_data
|
||||
)
|
||||
|
||||
if response.status_code == 200:
|
||||
result = response.json()
|
||||
|
||||
if visualize and output and result.get("visualization"):
|
||||
# Save visualization
|
||||
viz_data = base64.b64decode(result["visualization"])
|
||||
with open(output, 'wb') as f:
|
||||
f.write(viz_data)
|
||||
success(f"Attention visualization saved to {output}")
|
||||
else:
|
||||
output(result, ctx.obj['output_format'])
|
||||
else:
|
||||
error(f"Failed to analyze attention: {response.status_code}")
|
||||
if response.text:
|
||||
error(response.text)
|
||||
ctx.exit(1)
|
||||
except Exception as e:
|
||||
error(f"Network error: {e}")
|
||||
ctx.exit(1)
|
||||
|
||||
|
||||
@multimodal.command()
|
||||
@click.argument("agent_id")
|
||||
@click.pass_context
|
||||
def capabilities(ctx, agent_id: str):
|
||||
"""List multi-modal agent capabilities"""
|
||||
config = ctx.obj['config']
|
||||
|
||||
try:
|
||||
with httpx.Client() as client:
|
||||
response = client.get(
|
||||
f"{config.coordinator_url}/multimodal/agents/{agent_id}/capabilities",
|
||||
headers={"X-Api-Key": config.api_key or ""}
|
||||
)
|
||||
|
||||
if response.status_code == 200:
|
||||
capabilities = response.json()
|
||||
output(capabilities, ctx.obj['output_format'])
|
||||
else:
|
||||
error(f"Failed to get agent capabilities: {response.status_code}")
|
||||
ctx.exit(1)
|
||||
except Exception as e:
|
||||
error(f"Network error: {e}")
|
||||
ctx.exit(1)
|
||||
|
||||
|
||||
@multimodal.command()
|
||||
@click.argument("agent_id")
|
||||
@click.option("--modality", required=True,
|
||||
type=click.Choice(["text", "image", "audio", "video"]),
|
||||
help="Modality to test")
|
||||
@click.option("--test-data", type=click.File('r'), help="Test data JSON file")
|
||||
@click.pass_context
|
||||
def test(ctx, agent_id: str, modality: str, test_data):
|
||||
"""Test individual modality processing"""
|
||||
config = ctx.obj['config']
|
||||
|
||||
test_input = {}
|
||||
if test_data:
|
||||
try:
|
||||
test_input = json.load(test_data)
|
||||
except Exception as e:
|
||||
error(f"Failed to read test data file: {e}")
|
||||
return
|
||||
|
||||
try:
|
||||
with httpx.Client() as client:
|
||||
response = client.post(
|
||||
f"{config.coordinator_url}/multimodal/agents/{agent_id}/test/{modality}",
|
||||
headers={"X-Api-Key": config.api_key or ""},
|
||||
json=test_input
|
||||
)
|
||||
|
||||
if response.status_code == 200:
|
||||
result = response.json()
|
||||
success(f"Modality test completed for {modality}")
|
||||
output(result, ctx.obj['output_format'])
|
||||
else:
|
||||
error(f"Failed to test modality: {response.status_code}")
|
||||
if response.text:
|
||||
error(response.text)
|
||||
ctx.exit(1)
|
||||
except Exception as e:
|
||||
error(f"Network error: {e}")
|
||||
ctx.exit(1)
|
||||
Reference in New Issue
Block a user