Merge pull request 'Secure pickle deserialization in IPFS storage (issue #22)' (#27) from 22-pickle-security into main
Some checks failed
AITBC CI/CD Pipeline / lint-and-test (3.11) (push) Has been cancelled
AITBC CI/CD Pipeline / lint-and-test (3.12) (push) Has been cancelled
AITBC CI/CD Pipeline / lint-and-test (3.13) (push) Has been cancelled
AITBC CI/CD Pipeline / test-cli (push) Has been cancelled
AITBC CI/CD Pipeline / test-services (push) Has been cancelled
AITBC CI/CD Pipeline / test-production-services (push) Has been cancelled
AITBC CI/CD Pipeline / security-scan (push) Has been cancelled
AITBC CI/CD Pipeline / build (push) Has been cancelled
AITBC CI/CD Pipeline / deploy-staging (push) Has been cancelled
AITBC CI/CD Pipeline / deploy-production (push) Has been cancelled
AITBC CI/CD Pipeline / performance-test (push) Has been cancelled
AITBC CI/CD Pipeline / docs (push) Has been cancelled
AITBC CI/CD Pipeline / release (push) Has been cancelled
AITBC CI/CD Pipeline / notify (push) Has been cancelled
Security Scanning / Bandit Security Scan (apps/coordinator-api/src) (push) Has been cancelled
Security Scanning / Bandit Security Scan (cli/aitbc_cli) (push) Has been cancelled
Security Scanning / Bandit Security Scan (packages/py/aitbc-core/src) (push) Has been cancelled
Security Scanning / Bandit Security Scan (packages/py/aitbc-crypto/src) (push) Has been cancelled
Security Scanning / Bandit Security Scan (packages/py/aitbc-sdk/src) (push) Has been cancelled
Security Scanning / Bandit Security Scan (tests) (push) Has been cancelled
Security Scanning / CodeQL Security Analysis (javascript) (push) Has been cancelled
Security Scanning / CodeQL Security Analysis (python) (push) Has been cancelled
Security Scanning / Dependency Security Scan (push) Has been cancelled
Security Scanning / Container Security Scan (push) Has been cancelled
Security Scanning / OSSF Scorecard (push) Has been cancelled
Security Scanning / Security Summary Report (push) Has been cancelled

Reviewed-on: #27
This commit is contained in:
oib
2026-03-15 22:33:13 +01:00
3 changed files with 40 additions and 5 deletions

View File

@@ -12,6 +12,7 @@ import json
import hashlib import hashlib
import gzip import gzip
import pickle import pickle
from .secure_pickle import safe_loads
from dataclasses import dataclass, asdict from dataclasses import dataclass, asdict
try: try:
@@ -190,8 +191,8 @@ class IPFSStorageService:
else: else:
decompressed_data = retrieved_data decompressed_data = retrieved_data
# Deserialize # Deserialize (using safe unpickler)
memory_data = pickle.loads(decompressed_data) memory_data = safe_loads(decompressed_data)
logger.info(f"Retrieved memory for agent {metadata.agent_id}: CID {cid}") logger.info(f"Retrieved memory for agent {metadata.agent_id}: CID {cid}")
return memory_data, metadata return memory_data, metadata
@@ -353,7 +354,7 @@ class MemoryCompressionService:
def decompress_memory(compressed_data: bytes) -> Any: def decompress_memory(compressed_data: bytes) -> Any:
"""Decompress memory data""" """Decompress memory data"""
decompressed = gzip.decompress(compressed_data) decompressed = gzip.decompress(compressed_data)
return pickle.loads(decompressed) return safe_loads(decompressed)
@staticmethod @staticmethod
def calculate_similarity(data1: Any, data2: Any) -> float: def calculate_similarity(data1: Any, data2: Any) -> float:

View File

@@ -7,6 +7,7 @@ import asyncio
import json import json
import logging import logging
import pickle import pickle
from ...services.secure_pickle import safe_loads
from typing import Optional, Dict, Any, List from typing import Optional, Dict, Any, List
from dataclasses import dataclass, asdict from dataclasses import dataclass, asdict
from datetime import datetime, timedelta from datetime import datetime, timedelta
@@ -98,7 +99,7 @@ class TranslationCache:
if cached_data: if cached_data:
# Deserialize cache entry # Deserialize cache entry
cache_entry = pickle.loads(cached_data) cache_entry = safe_loads(cached_data)
# Update access statistics # Update access statistics
cache_entry.access_count += 1 cache_entry.access_count += 1
@@ -453,7 +454,7 @@ class TranslationCache:
try: try:
cached_data = await self.redis.get(key) cached_data = await self.redis.get(key)
if cached_data: if cached_data:
cache_entry = pickle.loads(cached_data) cache_entry = safe_loads(cached_data)
export_data.append(asdict(cache_entry)) export_data.append(asdict(cache_entry))
except Exception as e: except Exception as e:
logger.warning(f"Failed to export key {key}: {e}") logger.warning(f"Failed to export key {key}: {e}")

View File

@@ -0,0 +1,33 @@
"""
Secure pickle deserialization utilities to prevent arbitrary code execution.
"""
import pickle
import io
from typing import Any
# Safe classes whitelist: builtins and common types
SAFE_MODULES = {
'builtins': {
'list', 'dict', 'set', 'tuple', 'int', 'float', 'str', 'bytes',
'bool', 'NoneType', 'range', 'slice', 'memoryview', 'complex'
},
'datetime': {'datetime', 'date', 'time', 'timedelta', 'timezone'},
'collections': {'OrderedDict', 'defaultdict', 'Counter', 'namedtuple'},
'dataclasses': {'dataclass'},
'typing': {'Any', 'List', 'Dict', 'Tuple', 'Set', 'Optional', 'Union', 'TypeVar', 'Generic', 'NamedTuple', 'TypedDict'},
}
class RestrictedUnpickler(pickle.Unpickler):
"""
Unpickler that restricts which classes can be instantiated.
Only allows classes from SAFE_MODULES whitelist.
"""
def find_class(self, module: str, name: str) -> Any:
if module in SAFE_MODULES and name in SAFE_MODULES[module]:
return super().find_class(module, name)
raise pickle.UnpicklingError(f"Class {module}.{name} is not allowed for unpickling (security risk).")
def safe_loads(data: bytes) -> Any:
"""Safely deserialize a pickle byte stream."""
return RestrictedUnpickler(io.BytesIO(data)).load()