Files
aitbc/tests/e2e/test_performance_benchmarks.py
oib 825f157749 Update Python version requirements and fix compatibility issues
- Bump minimum Python version from 3.11 to 3.13 across all apps
- Add Python 3.11-3.13 test matrix to CLI workflow
- Document Python 3.11+ requirement in .env.example
- Fix Starlette Broadcast removal with in-process fallback implementation
- Add _InProcessBroadcast class for tests when Starlette Broadcast is unavailable
- Refactor API key validators to read live settings instead of cached values
- Update database models with explicit
2026-02-24 18:41:08 +01:00

622 lines
25 KiB
Python

"""
Performance Benchmark Tests for Enhanced Services
Validates performance claims from deployment report
"""
import asyncio
import httpx
import pytest
import json
import time
import statistics
from datetime import datetime, timedelta
from typing import Dict, Any, List, Optional, Tuple
from concurrent.futures import ThreadPoolExecutor
import psutil
# Performance targets from deployment report
PERFORMANCE_TARGETS = {
"multimodal": {
"text_processing": {"max_time": 0.02, "min_accuracy": 0.92},
"image_processing": {"max_time": 0.15, "min_accuracy": 0.87},
"audio_processing": {"max_time": 0.22, "min_accuracy": 0.89},
"video_processing": {"max_time": 0.35, "min_accuracy": 0.85},
"tabular_processing": {"max_time": 0.05, "min_accuracy": 0.95},
"graph_processing": {"max_time": 0.08, "min_accuracy": 0.91}
},
"gpu_multimodal": {
"cross_modal_attention": {"min_speedup": 10.0, "max_memory": 2.5},
"multi_modal_fusion": {"min_speedup": 20.0, "max_memory": 2.0},
"feature_extraction": {"min_speedup": 20.0, "max_memory": 3.0},
"agent_inference": {"min_speedup": 9.0, "max_memory": 1.5},
"learning_training": {"min_speedup": 9.4, "max_memory": 9.0}
},
"modality_optimization": {
"compression_ratio": {"min_ratio": 0.3, "max_ratio": 0.5},
"speedup": {"min_speedup": 150.0, "max_speedup": 220.0},
"accuracy_retention": {"min_accuracy": 0.93}
},
"adaptive_learning": {
"processing_time": {"max_time": 0.12},
"convergence_episodes": {"max_episodes": 200},
"final_reward": {"min_reward": 0.85}
},
"marketplace_enhanced": {
"transaction_processing": {"max_time": 0.03},
"royalty_calculation": {"max_time": 0.01},
"license_verification": {"max_time": 0.02},
"analytics_generation": {"max_time": 0.05}
},
"openclaw_enhanced": {
"agent_deployment": {"max_time": 0.05},
"orchestration_latency": {"max_time": 0.02},
"edge_deployment": {"max_time": 0.08},
"hybrid_efficiency": {"min_efficiency": 0.80}
}
}
# Service endpoints
SERVICES = {
"multimodal": "http://localhost:8002",
"gpu_multimodal": "http://localhost:8003",
"modality_optimization": "http://localhost:8004",
"adaptive_learning": "http://localhost:8005",
"marketplace_enhanced": "http://localhost:8006",
"openclaw_enhanced": "http://localhost:8007"
}
class PerformanceBenchmarkTester:
"""Performance testing framework for enhanced services"""
def __init__(self):
self.client = httpx.AsyncClient(timeout=60.0)
self.results = {}
self.system_metrics = {}
async def setup_test_environment(self) -> bool:
"""Setup and verify all services"""
print("🔧 Setting up performance benchmark environment...")
# Check system resources
self.system_metrics = {
"cpu_cores": psutil.cpu_count(),
"memory_total_gb": psutil.virtual_memory().total / (1024**3),
"memory_available_gb": psutil.virtual_memory().available / (1024**3),
"disk_free_gb": psutil.disk_usage('/').free / (1024**3)
}
print(f" 🖥️ System: {self.system_metrics['cpu_cores']} cores, {self.system_metrics['memory_total_gb']:.1f}GB RAM")
# Check services
healthy_services = []
for service_name, service_url in SERVICES.items():
try:
response = await self.client.get(f"{service_url}/health")
if response.status_code == 200:
healthy_services.append(service_name)
print(f"{service_name} healthy")
else:
print(f"{service_name} unhealthy: {response.status_code}")
except Exception as e:
print(f"{service_name} unavailable: {e}")
if len(healthy_services) < 4:
print(f" ⚠️ Only {len(healthy_services)}/{len(SERVICES)} services available")
return False
print(" ✅ Performance benchmark environment ready")
return True
async def cleanup_test_environment(self):
"""Cleanup test environment"""
await self.client.aclose()
async def benchmark_multimodal_performance(self) -> Dict[str, Any]:
"""Benchmark multi-modal processing performance"""
print("\n🤖 Benchmarking Multi-Modal Performance...")
results = {}
# Test text processing
print(" 📝 Testing text processing...")
text_times = []
for i in range(10):
start_time = time.time()
response = await self.client.post(
f"{SERVICES['multimodal']}/process",
json={
"agent_id": f"benchmark-text-{i}",
"inputs": {"text": "This is a benchmark test for text processing performance."},
"processing_mode": "text_analysis"
}
)
end_time = time.time()
if response.status_code == 200:
text_times.append(end_time - start_time)
if text_times:
avg_time = statistics.mean(text_times)
target = PERFORMANCE_TARGETS["multimodal"]["text_processing"]
results["text_processing"] = {
"avg_time": avg_time,
"target_time": target["max_time"],
"meets_target": avg_time <= target["max_time"],
"samples": len(text_times)
}
status = "" if results["text_processing"]["meets_target"] else ""
print(f" {status} Text: {avg_time:.3f}s (target: ≤{target['max_time']}s)")
# Test image processing
print(" 🖼️ Testing image processing...")
image_times = []
for i in range(5): # Fewer samples for image processing
start_time = time.time()
response = await self.client.post(
f"{SERVICES['multimodal']}/process",
json={
"agent_id": f"benchmark-image-{i}",
"inputs": {"image_url": "https://example.com/test-image.jpg", "format": "jpeg"},
"processing_mode": "image_analysis"
}
)
end_time = time.time()
if response.status_code == 200:
image_times.append(end_time - start_time)
if image_times:
avg_time = statistics.mean(image_times)
target = PERFORMANCE_TARGETS["multimodal"]["image_processing"]
results["image_processing"] = {
"avg_time": avg_time,
"target_time": target["max_time"],
"meets_target": avg_time <= target["max_time"],
"samples": len(image_times)
}
status = "" if results["image_processing"]["meets_target"] else ""
print(f" {status} Image: {avg_time:.3f}s (target: ≤{target['max_time']}s)")
return results
async def benchmark_gpu_performance(self) -> Dict[str, Any]:
"""Benchmark GPU acceleration performance"""
print("\n🚀 Benchmarking GPU Performance...")
results = {}
# Check GPU availability first
gpu_health = await self.client.get(f"{SERVICES['gpu_multimodal']}/health")
if gpu_health.status_code != 200:
print(" ❌ GPU service not available")
return {"error": "GPU service not available"}
gpu_info = gpu_health.json().get("gpu", {})
if not gpu_info.get("available", False):
print(" ❌ GPU not available")
return {"error": "GPU not available"}
print(f" 🎮 GPU: {gpu_info.get('name', 'Unknown')} ({gpu_info.get('memory_total_gb', 0)}GB)")
# Test cross-modal attention
print(" 🧠 Testing cross-modal attention...")
attention_speedups = []
for i in range(5):
# GPU processing
start_time = time.time()
gpu_response = await self.client.post(
f"{SERVICES['gpu_multimodal']}/attention",
json={
"modality_features": {
"text": [0.1, 0.2, 0.3, 0.4, 0.5] * 20,
"image": [0.5, 0.4, 0.3, 0.2, 0.1] * 20,
"audio": [0.3, 0.3, 0.3, 0.3, 0.3] * 20
},
"attention_config": {"attention_type": "cross_modal", "num_heads": 8}
}
)
gpu_time = time.time() - start_time
if gpu_response.status_code == 200:
gpu_result = gpu_response.json()
speedup = gpu_result.get("speedup", 0)
if speedup > 0:
attention_speedups.append(speedup)
if attention_speedups:
avg_speedup = statistics.mean(attention_speedups)
target = PERFORMANCE_TARGETS["gpu_multimodal"]["cross_modal_attention"]
results["cross_modal_attention"] = {
"avg_speedup": avg_speedup,
"target_speedup": target["min_speedup"],
"meets_target": avg_speedup >= target["min_speedup"],
"samples": len(attention_speedups)
}
status = "" if results["cross_modal_attention"]["meets_target"] else ""
print(f" {status} Cross-modal attention: {avg_speedup:.1f}x speedup (target: ≥{target['min_speedup']}x)")
# Test multi-modal fusion
print(" 🔀 Testing multi-modal fusion...")
fusion_speedups = []
for i in range(5):
start_time = time.time()
fusion_response = await self.client.post(
f"{SERVICES['gpu_multimodal']}/fusion",
json={
"modality_data": {
"text_features": [0.1, 0.2, 0.3] * 50,
"image_features": [0.4, 0.5, 0.6] * 50,
"audio_features": [0.7, 0.8, 0.9] * 50
},
"fusion_config": {"fusion_type": "attention_based", "output_dim": 256}
}
)
fusion_time = time.time() - start_time
if fusion_response.status_code == 200:
fusion_result = fusion_response.json()
speedup = fusion_result.get("speedup", 0)
if speedup > 0:
fusion_speedups.append(speedup)
if fusion_speedups:
avg_speedup = statistics.mean(fusion_speedups)
target = PERFORMANCE_TARGETS["gpu_multimodal"]["multi_modal_fusion"]
results["multi_modal_fusion"] = {
"avg_speedup": avg_speedup,
"target_speedup": target["min_speedup"],
"meets_target": avg_speedup >= target["min_speedup"],
"samples": len(fusion_speedups)
}
status = "" if results["multi_modal_fusion"]["meets_target"] else ""
print(f" {status} Multi-modal fusion: {avg_speedup:.1f}x speedup (target: ≥{target['min_speedup']}x)")
return results
async def benchmark_marketplace_performance(self) -> Dict[str, Any]:
"""Benchmark marketplace transaction performance"""
print("\n🏪 Benchmarking Marketplace Performance...")
results = {}
# Test transaction processing
print(" 💸 Testing transaction processing...")
transaction_times = []
for i in range(10):
start_time = time.time()
response = await self.client.post(
f"{SERVICES['marketplace_enhanced']}/v1/trading/execute",
json={
"bid_id": f"benchmark-bid-{i}",
"buyer_address": "0x1234567890123456789012345678901234567890",
"payment_method": "crypto",
"amount": 0.1
}
)
end_time = time.time()
# Even if it fails, measure response time
transaction_times.append(end_time - start_time)
if transaction_times:
avg_time = statistics.mean(transaction_times)
target = PERFORMANCE_TARGETS["marketplace_enhanced"]["transaction_processing"]
results["transaction_processing"] = {
"avg_time": avg_time,
"target_time": target["max_time"],
"meets_target": avg_time <= target["max_time"],
"samples": len(transaction_times)
}
status = "" if results["transaction_processing"]["meets_target"] else ""
print(f" {status} Transaction processing: {avg_time:.3f}s (target: ≤{target['max_time']}s)")
# Test royalty calculation
print(" 💰 Testing royalty calculation...")
royalty_times = []
for i in range(20): # More samples for faster operation
start_time = time.time()
response = await self.client.post(
f"{SERVICES['marketplace_enhanced']}/v1/analytics/royalties",
json={
"model_id": f"benchmark-model-{i}",
"transaction_amount": 0.5,
"royalty_config": {
"creator_percentage": 15.0,
"platform_percentage": 5.0
}
}
)
end_time = time.time()
royalty_times.append(end_time - start_time)
if royalty_times:
avg_time = statistics.mean(royalty_times)
target = PERFORMANCE_TARGETS["marketplace_enhanced"]["royalty_calculation"]
results["royalty_calculation"] = {
"avg_time": avg_time,
"target_time": target["max_time"],
"meets_target": avg_time <= target["max_time"],
"samples": len(royalty_times)
}
status = "" if results["royalty_calculation"]["meets_target"] else ""
print(f" {status} Royalty calculation: {avg_time:.3f}s (target: ≤{target['max_time']}s)")
return results
async def benchmark_concurrent_performance(self) -> Dict[str, Any]:
"""Benchmark concurrent request handling"""
print("\n⚡ Benchmarking Concurrent Performance...")
results = {}
# Test concurrent requests to multi-modal service
print(" 🔄 Testing concurrent multi-modal requests...")
async def make_request(request_id: int) -> Tuple[float, bool]:
"""Make a single request and return (time, success)"""
start_time = time.time()
try:
response = await self.client.post(
f"{SERVICES['multimodal']}/process",
json={
"agent_id": f"concurrent-test-{request_id}",
"inputs": {"text": f"Concurrent test request {request_id}"},
"processing_mode": "text_analysis"
}
)
end_time = time.time()
return (end_time - start_time, response.status_code == 200)
except Exception:
end_time = time.time()
return (end_time - start_time, False)
# Test with different concurrency levels
concurrency_levels = [1, 5, 10, 20]
for concurrency in concurrency_levels:
print(f" Testing {concurrency} concurrent requests...")
start_time = time.time()
tasks = [make_request(i) for i in range(concurrency)]
request_results = await asyncio.gather(*tasks)
total_time = time.time() - start_time
# Analyze results
times = [r[0] for r in request_results]
successes = [r[1] for r in request_results]
success_rate = sum(successes) / len(successes)
avg_response_time = statistics.mean(times)
max_response_time = max(times)
results[f"concurrent_{concurrency}"] = {
"concurrency": concurrency,
"total_time": total_time,
"success_rate": success_rate,
"avg_response_time": avg_response_time,
"max_response_time": max_response_time,
"requests_per_second": concurrency / total_time
}
status = "" if success_rate >= 0.9 else ""
print(f" {status} {concurrency} concurrent: {success_rate:.1%} success, {avg_response_time:.3f}s avg")
return results
async def run_all_benchmarks(self) -> Dict[str, Any]:
"""Run all performance benchmarks"""
print("🎯 Starting Performance Benchmark Suite")
print("="*60)
benchmark_start = time.time()
all_results = {}
# Run individual benchmarks
try:
all_results["multimodal"] = await self.benchmark_multimodal_performance()
except Exception as e:
all_results["multimodal"] = {"error": str(e)}
try:
all_results["gpu_multimodal"] = await self.benchmark_gpu_performance()
except Exception as e:
all_results["gpu_multimodal"] = {"error": str(e)}
try:
all_results["marketplace"] = await self.benchmark_marketplace_performance()
except Exception as e:
all_results["marketplace"] = {"error": str(e)}
try:
all_results["concurrent"] = await self.benchmark_concurrent_performance()
except Exception as e:
all_results["concurrent"] = {"error": str(e)}
total_duration = time.time() - benchmark_start
# Calculate overall performance score
total_tests = 0
passed_tests = 0
for service_results in all_results.values():
if isinstance(service_results, dict) and "error" not in service_results:
for test_result in service_results.values():
if isinstance(test_result, dict) and "meets_target" in test_result:
total_tests += 1
if test_result["meets_target"]:
passed_tests += 1
overall_score = passed_tests / total_tests if total_tests > 0 else 0
print("\n" + "="*60)
print(" PERFORMANCE BENCHMARK SUMMARY")
print("="*60)
print(f"Total Duration: {total_duration:.1f}s")
print(f"Tests Passed: {passed_tests}/{total_tests}")
print(f"Performance Score: {overall_score:.1%}")
print(f"Overall Status: {'✅ EXCELLENT' if overall_score >= 0.9 else '⚠️ GOOD' if overall_score >= 0.7 else '❌ NEEDS IMPROVEMENT'}")
return {
"overall_score": overall_score,
"total_duration": total_duration,
"tests_passed": passed_tests,
"total_tests": total_tests,
"system_metrics": self.system_metrics,
"results": all_results
}
# Pytest test functions
@pytest.mark.asyncio
@pytest.mark.e2e
@pytest.mark.performance
async def test_multimodal_performance_benchmarks():
"""Test multi-modal service performance against targets"""
tester = PerformanceBenchmarkTester()
try:
if not await tester.setup_test_environment():
pytest.skip("Services not available for performance testing")
results = await tester.benchmark_multimodal_performance()
# Verify key performance targets
if "text_processing" in results:
assert results["text_processing"]["meets_target"], f"Text processing too slow: {results['text_processing']['avg_time']:.3f}s"
if "image_processing" in results:
assert results["image_processing"]["meets_target"], f"Image processing too slow: {results['image_processing']['avg_time']:.3f}s"
print(f"✅ Multi-modal performance benchmarks passed")
finally:
await tester.cleanup_test_environment()
@pytest.mark.asyncio
@pytest.mark.e2e
@pytest.mark.performance
async def test_gpu_acceleration_benchmarks():
"""Test GPU acceleration performance against targets"""
tester = PerformanceBenchmarkTester()
try:
if not await tester.setup_test_environment():
pytest.skip("Services not available for performance testing")
results = await tester.benchmark_gpu_performance()
# Skip if GPU not available
if "error" in results:
pytest.skip("GPU not available for testing")
# Verify GPU performance targets
if "cross_modal_attention" in results:
assert results["cross_modal_attention"]["meets_target"], f"Cross-modal attention speedup too low: {results['cross_modal_attention']['avg_speedup']:.1f}x"
if "multi_modal_fusion" in results:
assert results["multi_modal_fusion"]["meets_target"], f"Multi-modal fusion speedup too low: {results['multi_modal_fusion']['avg_speedup']:.1f}x"
print(f"✅ GPU acceleration benchmarks passed")
finally:
await tester.cleanup_test_environment()
@pytest.mark.asyncio
@pytest.mark.e2e
@pytest.mark.performance
async def test_marketplace_performance_benchmarks():
"""Test marketplace service performance against targets"""
tester = PerformanceBenchmarkTester()
try:
if not await tester.setup_test_environment():
pytest.skip("Services not available for performance testing")
results = await tester.benchmark_marketplace_performance()
# Verify marketplace performance targets
if "transaction_processing" in results:
assert results["transaction_processing"]["meets_target"], f"Transaction processing too slow: {results['transaction_processing']['avg_time']:.3f}s"
if "royalty_calculation" in results:
assert results["royalty_calculation"]["meets_target"], f"Royalty calculation too slow: {results['royalty_calculation']['avg_time']:.3f}s"
print(f"✅ Marketplace performance benchmarks passed")
finally:
await tester.cleanup_test_environment()
@pytest.mark.asyncio
@pytest.mark.e2e
@pytest.mark.performance
async def test_concurrent_performance_benchmarks():
"""Test concurrent request handling performance"""
tester = PerformanceBenchmarkTester()
try:
if not await tester.setup_test_environment():
pytest.skip("Services not available for performance testing")
results = await tester.benchmark_concurrent_performance()
# Verify concurrent performance
for concurrency_level, result in results.items():
if isinstance(result, dict):
assert result["success_rate"] >= 0.8, f"Success rate too low for {concurrency_level}: {result['success_rate']:.1%}"
print(f"✅ Concurrent performance benchmarks passed")
finally:
await tester.cleanup_test_environment()
@pytest.mark.asyncio
@pytest.mark.e2e
@pytest.mark.performance
async def test_complete_performance_suite():
"""Run complete performance benchmark suite"""
tester = PerformanceBenchmarkTester()
try:
if not await tester.setup_test_environment():
pytest.skip("Services not available for performance testing")
results = await tester.run_all_benchmarks()
# Verify overall performance
assert results["overall_score"] >= 0.6, f"Overall performance score too low: {results['overall_score']:.1%}"
assert results["total_duration"] < 300.0, "Performance suite took too long"
print(f"✅ Complete performance suite: {results['overall_score']:.1%} score")
finally:
await tester.cleanup_test_environment()
if __name__ == "__main__":
# Run benchmarks manually
async def main():
tester = PerformanceBenchmarkTester()
try:
if await tester.setup_test_environment():
results = await tester.run_all_benchmarks()
print(f"\n🎯 Performance Benchmark Complete:")
print(f"Score: {results['overall_score']:.1%}")
print(f"Duration: {results['total_duration']:.1f}s")
print(f"Tests: {results['tests_passed']}/{results['total_tests']}")
finally:
await tester.cleanup_test_environment()
asyncio.run(main())