- Restructure .env.example with security-focused documentation, service-specific environment file references, and AWS Secrets Manager integration - Update CLI tests workflow to single Python 3.13 version, add pytest-mock dependency, and consolidate test execution with coverage - Add comprehensive security validation to package publishing workflow with manual approval gates, secret scanning, and release
356 lines
14 KiB
Python
Executable File
356 lines
14 KiB
Python
Executable File
#!/usr/bin/env python3
|
|
"""
|
|
Dotenv Linter for AITBC
|
|
|
|
This script checks for configuration drift between .env.example and actual
|
|
environment variable usage in the codebase. It ensures that all environment
|
|
variables used in the code are documented in .env.example and vice versa.
|
|
|
|
Usage:
|
|
python scripts/dotenv_linter.py
|
|
python scripts/dotenv_linter.py --fix
|
|
python scripts/dotenv_linter.py --verbose
|
|
"""
|
|
|
|
import os
|
|
import re
|
|
import sys
|
|
import argparse
|
|
from pathlib import Path
|
|
from typing import Set, Dict, List, Tuple
|
|
import ast
|
|
import subprocess
|
|
|
|
|
|
class DotenvLinter:
|
|
"""Linter for .env files and environment variable usage."""
|
|
|
|
def __init__(self, project_root: Path = None):
|
|
"""Initialize the linter."""
|
|
self.project_root = project_root or Path(__file__).parent.parent
|
|
self.env_example_path = self.project_root / ".env.example"
|
|
self.python_files = self._find_python_files()
|
|
|
|
def _find_python_files(self) -> List[Path]:
|
|
"""Find all Python files in the project."""
|
|
python_files = []
|
|
for root, dirs, files in os.walk(self.project_root):
|
|
# Skip hidden directories and common exclusions
|
|
dirs[:] = [d for d in dirs if not d.startswith('.') and d not in {
|
|
'__pycache__', 'node_modules', '.git', 'venv', 'env', '.venv'
|
|
}]
|
|
|
|
for file in files:
|
|
if file.endswith('.py'):
|
|
python_files.append(Path(root) / file)
|
|
|
|
return python_files
|
|
|
|
def _parse_env_example(self) -> Set[str]:
|
|
"""Parse .env.example and extract all environment variable keys."""
|
|
env_vars = set()
|
|
|
|
if not self.env_example_path.exists():
|
|
print(f"❌ .env.example not found at {self.env_example_path}")
|
|
return env_vars
|
|
|
|
with open(self.env_example_path, 'r') as f:
|
|
for line_num, line in enumerate(f, 1):
|
|
line = line.strip()
|
|
|
|
# Skip comments and empty lines
|
|
if not line or line.startswith('#'):
|
|
continue
|
|
|
|
# Extract variable name (everything before =)
|
|
if '=' in line:
|
|
var_name = line.split('=')[0].strip()
|
|
if var_name:
|
|
env_vars.add(var_name)
|
|
|
|
return env_vars
|
|
|
|
def _find_env_usage_in_python(self) -> Set[str]:
|
|
"""Find all environment variable usage in Python files."""
|
|
env_vars = set()
|
|
|
|
# Patterns to search for
|
|
patterns = [
|
|
r'os\.environ\.get\([\'"]([^\'"]+)[\'"]',
|
|
r'os\.environ\[([\'"]([^\'"]+)[\'"])\]',
|
|
r'os\.getenv\([\'"]([^\'"]+)[\'"]',
|
|
r'getenv\([\'"]([^\'"]+)[\'"]',
|
|
r'environ\.get\([\'"]([^\'"]+)[\'"]',
|
|
r'environ\[([\'"]([^\'"]+)[\'"])\]',
|
|
]
|
|
|
|
for python_file in self.python_files:
|
|
try:
|
|
with open(python_file, 'r', encoding='utf-8') as f:
|
|
content = f.read()
|
|
|
|
for pattern in patterns:
|
|
matches = re.finditer(pattern, content)
|
|
for match in matches:
|
|
var_name = match.group(1)
|
|
env_vars.add(var_name)
|
|
|
|
except (UnicodeDecodeError, PermissionError) as e:
|
|
print(f"⚠️ Could not read {python_file}: {e}")
|
|
|
|
return env_vars
|
|
|
|
def _find_env_usage_in_config_files(self) -> Set[str]:
|
|
"""Find environment variable usage in configuration files."""
|
|
env_vars = set()
|
|
|
|
# Check common config files
|
|
config_files = [
|
|
'pyproject.toml',
|
|
'pytest.ini',
|
|
'setup.cfg',
|
|
'tox.ini',
|
|
'.github/workflows/*.yml',
|
|
'.github/workflows/*.yaml',
|
|
'docker-compose.yml',
|
|
'docker-compose.yaml',
|
|
'Dockerfile',
|
|
]
|
|
|
|
for pattern in config_files:
|
|
for config_file in self.project_root.glob(pattern):
|
|
try:
|
|
with open(config_file, 'r', encoding='utf-8') as f:
|
|
content = f.read()
|
|
|
|
# Look for environment variable patterns
|
|
env_patterns = [
|
|
r'\${([A-Z_][A-Z0-9_]*)}', # ${VAR_NAME}
|
|
r'\$([A-Z_][A-Z0-9_]*)', # $VAR_NAME
|
|
r'env\.([A-Z_][A-Z0-9_]*)', # env.VAR_NAME
|
|
r'os\.environ\([\'"]([^\'"]+)[\'"]', # os.environ("VAR_NAME")
|
|
r'getenv\([\'"]([^\'"]+)[\'"]', # getenv("VAR_NAME")
|
|
]
|
|
|
|
for env_pattern in env_patterns:
|
|
matches = re.finditer(env_pattern, content)
|
|
for match in matches:
|
|
var_name = match.group(1) if match.groups() else match.group(0)
|
|
if var_name.isupper():
|
|
env_vars.add(var_name)
|
|
|
|
except (UnicodeDecodeError, PermissionError) as e:
|
|
print(f"⚠️ Could not read {config_file}: {e}")
|
|
|
|
return env_vars
|
|
|
|
def _find_env_usage_in_shell_scripts(self) -> Set[str]:
|
|
"""Find environment variable usage in shell scripts."""
|
|
env_vars = set()
|
|
|
|
shell_files = []
|
|
for root, dirs, files in os.walk(self.project_root):
|
|
dirs[:] = [d for d in dirs if not d.startswith('.') and d not in {
|
|
'__pycache__', 'node_modules', '.git', 'venv', 'env', '.venv'
|
|
}]
|
|
|
|
for file in files:
|
|
if file.endswith(('.sh', '.bash', '.zsh')):
|
|
shell_files.append(Path(root) / file)
|
|
|
|
for shell_file in shell_files:
|
|
try:
|
|
with open(shell_file, 'r', encoding='utf-8') as f:
|
|
content = f.read()
|
|
|
|
# Look for environment variable patterns in shell scripts
|
|
patterns = [
|
|
r'\$\{([A-Z_][A-Z0-9_]*)\}', # ${VAR_NAME}
|
|
r'\$([A-Z_][A-Z0-9_]*)', # $VAR_NAME
|
|
r'export\s+([A-Z_][A-Z0-9_]*)=', # export VAR_NAME=
|
|
r'([A-Z_][A-Z0-9_]*)=', # VAR_NAME=
|
|
]
|
|
|
|
for pattern in patterns:
|
|
matches = re.finditer(pattern, content)
|
|
for match in matches:
|
|
var_name = match.group(1)
|
|
env_vars.add(var_name)
|
|
|
|
except (UnicodeDecodeError, PermissionError) as e:
|
|
print(f"⚠️ Could not read {shell_file}: {e}")
|
|
|
|
return env_vars
|
|
|
|
def _find_all_env_usage(self) -> Set[str]:
|
|
"""Find all environment variable usage across the project."""
|
|
all_vars = set()
|
|
|
|
# Python files
|
|
python_vars = self._find_env_usage_in_python()
|
|
all_vars.update(python_vars)
|
|
|
|
# Config files
|
|
config_vars = self._find_env_usage_in_config_files()
|
|
all_vars.update(config_vars)
|
|
|
|
# Shell scripts
|
|
shell_vars = self._find_env_usage_in_shell_scripts()
|
|
all_vars.update(shell_vars)
|
|
|
|
return all_vars
|
|
|
|
def _check_missing_in_example(self, used_vars: Set[str], example_vars: Set[str]) -> Set[str]:
|
|
"""Find variables used in code but missing from .env.example."""
|
|
missing = used_vars - example_vars
|
|
|
|
# Filter out common system variables that don't need to be in .env.example
|
|
system_vars = {
|
|
'PATH', 'HOME', 'USER', 'SHELL', 'TERM', 'LANG', 'LC_ALL',
|
|
'PYTHONPATH', 'PYTHONHOME', 'VIRTUAL_ENV', 'CONDA_DEFAULT_ENV',
|
|
'GITHUB_ACTIONS', 'CI', 'TRAVIS', 'APPVEYOR', 'CIRCLECI',
|
|
'HTTP_PROXY', 'HTTPS_PROXY', 'NO_PROXY', 'http_proxy', 'https_proxy',
|
|
'PWD', 'OLDPWD', 'SHLVL', '_', 'HOSTNAME', 'HOSTTYPE', 'OSTYPE',
|
|
'MACHTYPE', 'UID', 'GID', 'EUID', 'EGID', 'PS1', 'PS2', 'IFS',
|
|
'DISPLAY', 'XAUTHORITY', 'DBUS_SESSION_BUS_ADDRESS', 'SSH_AUTH_SOCK',
|
|
'SSH_CONNECTION', 'SSH_CLIENT', 'SSH_TTY', 'LOGNAME', 'USERNAME'
|
|
}
|
|
|
|
return missing - system_vars
|
|
|
|
def _check_unused_in_example(self, used_vars: Set[str], example_vars: Set[str]) -> Set[str]:
|
|
"""Find variables in .env.example but not used in code."""
|
|
unused = example_vars - used_vars
|
|
|
|
# Filter out variables that might be used by external tools or services
|
|
external_vars = {
|
|
'NODE_ENV', 'NPM_CONFIG_PREFIX', 'NPM_AUTH_TOKEN',
|
|
'DOCKER_HOST', 'DOCKER_TLS_VERIFY', 'DOCKER_CERT_PATH',
|
|
'KUBERNETES_SERVICE_HOST', 'KUBERNETES_SERVICE_PORT',
|
|
'REDIS_URL', 'MEMCACHED_URL', 'ELASTICSEARCH_URL',
|
|
'SENTRY_DSN', 'ROLLBAR_ACCESS_TOKEN', 'HONEYBADGER_API_KEY'
|
|
}
|
|
|
|
return unused - external_vars
|
|
|
|
def lint(self, verbose: bool = False) -> Tuple[int, int, int, Set[str], Set[str]]:
|
|
"""Run the linter and return results."""
|
|
print("🔍 Dotenv Linter for AITBC")
|
|
print("=" * 50)
|
|
|
|
# Parse .env.example
|
|
example_vars = self._parse_env_example()
|
|
if verbose:
|
|
print(f"📄 Found {len(example_vars)} variables in .env.example")
|
|
if example_vars:
|
|
print(f" {', '.join(sorted(example_vars))}")
|
|
|
|
# Find all environment variable usage
|
|
used_vars = self._find_all_env_usage()
|
|
if verbose:
|
|
print(f"🔍 Found {len(used_vars)} variables used in code")
|
|
if used_vars:
|
|
print(f" {', '.join(sorted(used_vars))}")
|
|
|
|
# Check for missing variables
|
|
missing_vars = self._check_missing_in_example(used_vars, example_vars)
|
|
|
|
# Check for unused variables
|
|
unused_vars = self._check_unused_in_example(used_vars, example_vars)
|
|
|
|
return len(example_vars), len(used_vars), len(missing_vars), missing_vars, unused_vars
|
|
|
|
def fix_env_example(self, missing_vars: Set[str], verbose: bool = False):
|
|
"""Add missing variables to .env.example."""
|
|
if not missing_vars:
|
|
if verbose:
|
|
print("✅ No missing variables to add")
|
|
return
|
|
|
|
print(f"🔧 Adding {len(missing_vars)} missing variables to .env.example")
|
|
|
|
with open(self.env_example_path, 'a') as f:
|
|
f.write("\n# Auto-generated variables (added by dotenv_linter)\n")
|
|
for var in sorted(missing_vars):
|
|
f.write(f"{var}=\n")
|
|
|
|
print(f"✅ Added {len(missing_vars)} variables to .env.example")
|
|
|
|
def generate_report(self, example_count: int, used_count: int, missing_count: int,
|
|
missing_vars: Set[str], unused_vars: Set[str]) -> str:
|
|
"""Generate a detailed report."""
|
|
report = []
|
|
|
|
report.append("📊 Dotenv Linter Report")
|
|
report.append("=" * 50)
|
|
report.append(f"Variables in .env.example: {example_count}")
|
|
report.append(f"Variables used in code: {used_count}")
|
|
report.append(f"Missing from .env.example: {missing_count}")
|
|
report.append(f"Unused in .env.example: {len(unused_vars)}")
|
|
report.append("")
|
|
|
|
if missing_vars:
|
|
report.append("❌ Missing Variables (used in code but not in .env.example):")
|
|
for var in sorted(missing_vars):
|
|
report.append(f" - {var}")
|
|
report.append("")
|
|
|
|
if unused_vars:
|
|
report.append("⚠️ Unused Variables (in .env.example but not used in code):")
|
|
for var in sorted(unused_vars):
|
|
report.append(f" - {var}")
|
|
report.append("")
|
|
|
|
if not missing_vars and not unused_vars:
|
|
report.append("✅ No configuration drift detected!")
|
|
|
|
return "\n".join(report)
|
|
|
|
|
|
def main():
|
|
"""Main entry point."""
|
|
parser = argparse.ArgumentParser(
|
|
description="Dotenv Linter for AITBC - Check for configuration drift",
|
|
formatter_class=argparse.RawDescriptionHelpFormatter,
|
|
epilog="""
|
|
Examples:
|
|
python scripts/dotenv_linter.py # Check for drift
|
|
python scripts/dotenv_linter.py --verbose # Verbose output
|
|
python scripts/dotenv_linter.py --fix # Auto-fix missing variables
|
|
python scripts/dotenv_linter.py --check # Exit with error code on issues
|
|
"""
|
|
)
|
|
|
|
parser.add_argument("--verbose", "-v", action="store_true", help="Verbose output")
|
|
parser.add_argument("--fix", action="store_true", help="Auto-fix missing variables in .env.example")
|
|
parser.add_argument("--check", action="store_true", help="Exit with error code if issues found")
|
|
|
|
args = parser.parse_args()
|
|
|
|
# Initialize linter
|
|
linter = DotenvLinter()
|
|
|
|
# Run linting
|
|
example_count, used_count, missing_count, missing_vars, unused_vars = linter.lint(args.verbose)
|
|
|
|
# Generate report
|
|
report = linter.generate_report(example_count, used_count, missing_count, missing_vars, unused_vars)
|
|
print(report)
|
|
|
|
# Auto-fix if requested
|
|
if args.fix and missing_vars:
|
|
linter.fix_env_example(missing_vars, args.verbose)
|
|
|
|
# Exit with error code if check requested and issues found
|
|
if args.check and (missing_vars or unused_vars):
|
|
print(f"❌ Configuration drift detected: {missing_count} missing, {len(unused_vars)} unused")
|
|
sys.exit(1)
|
|
|
|
# Success
|
|
print("✅ Dotenv linter completed successfully")
|
|
return 0
|
|
|
|
|
|
if __name__ == "__main__":
|
|
sys.exit(main())
|