diff --git a/apps/coordinator-api/src/app/services/ipfs_storage_service.py b/apps/coordinator-api/src/app/services/ipfs_storage_service.py index edb47d3f..3828e012 100755 --- a/apps/coordinator-api/src/app/services/ipfs_storage_service.py +++ b/apps/coordinator-api/src/app/services/ipfs_storage_service.py @@ -12,6 +12,7 @@ import json import hashlib import gzip import pickle +from .secure_pickle import safe_loads from dataclasses import dataclass, asdict try: @@ -190,8 +191,8 @@ class IPFSStorageService: else: decompressed_data = retrieved_data - # Deserialize - memory_data = pickle.loads(decompressed_data) + # Deserialize (using safe unpickler) + memory_data = safe_loads(decompressed_data) logger.info(f"Retrieved memory for agent {metadata.agent_id}: CID {cid}") return memory_data, metadata @@ -353,7 +354,7 @@ class MemoryCompressionService: def decompress_memory(compressed_data: bytes) -> Any: """Decompress memory data""" decompressed = gzip.decompress(compressed_data) - return pickle.loads(decompressed) + return safe_loads(decompressed) @staticmethod def calculate_similarity(data1: Any, data2: Any) -> float: diff --git a/apps/coordinator-api/src/app/services/multi_language/translation_cache.py b/apps/coordinator-api/src/app/services/multi_language/translation_cache.py index 275ebeb7..a292157b 100755 --- a/apps/coordinator-api/src/app/services/multi_language/translation_cache.py +++ b/apps/coordinator-api/src/app/services/multi_language/translation_cache.py @@ -7,6 +7,7 @@ import asyncio import json import logging import pickle +from ...services.secure_pickle import safe_loads from typing import Optional, Dict, Any, List from dataclasses import dataclass, asdict from datetime import datetime, timedelta @@ -98,7 +99,7 @@ class TranslationCache: if cached_data: # Deserialize cache entry - cache_entry = pickle.loads(cached_data) + cache_entry = safe_loads(cached_data) # Update access statistics cache_entry.access_count += 1 @@ -453,7 +454,7 @@ class TranslationCache: try: cached_data = await self.redis.get(key) if cached_data: - cache_entry = pickle.loads(cached_data) + cache_entry = safe_loads(cached_data) export_data.append(asdict(cache_entry)) except Exception as e: logger.warning(f"Failed to export key {key}: {e}") diff --git a/apps/coordinator-api/src/app/services/secure_pickle.py b/apps/coordinator-api/src/app/services/secure_pickle.py new file mode 100644 index 00000000..2bbcc81e --- /dev/null +++ b/apps/coordinator-api/src/app/services/secure_pickle.py @@ -0,0 +1,33 @@ +""" +Secure pickle deserialization utilities to prevent arbitrary code execution. +""" + +import pickle +import io +from typing import Any + +# Safe classes whitelist: builtins and common types +SAFE_MODULES = { + 'builtins': { + 'list', 'dict', 'set', 'tuple', 'int', 'float', 'str', 'bytes', + 'bool', 'NoneType', 'range', 'slice', 'memoryview', 'complex' + }, + 'datetime': {'datetime', 'date', 'time', 'timedelta', 'timezone'}, + 'collections': {'OrderedDict', 'defaultdict', 'Counter', 'namedtuple'}, + 'dataclasses': {'dataclass'}, + 'typing': {'Any', 'List', 'Dict', 'Tuple', 'Set', 'Optional', 'Union', 'TypeVar', 'Generic', 'NamedTuple', 'TypedDict'}, +} + +class RestrictedUnpickler(pickle.Unpickler): + """ + Unpickler that restricts which classes can be instantiated. + Only allows classes from SAFE_MODULES whitelist. + """ + def find_class(self, module: str, name: str) -> Any: + if module in SAFE_MODULES and name in SAFE_MODULES[module]: + return super().find_class(module, name) + raise pickle.UnpicklingError(f"Class {module}.{name} is not allowed for unpickling (security risk).") + +def safe_loads(data: bytes) -> Any: + """Safely deserialize a pickle byte stream.""" + return RestrictedUnpickler(io.BytesIO(data)).load()