Files
at1-workstation-scripts/nvidia-repair.sh
2026-04-17 01:40:23 +02:00

122 lines
3.5 KiB
Bash
Executable File

#!/bin/zsh
# Script Version: 04
# Description: Repair NVIDIA DKMS for the current kernel; ensure canonical symlinks, depmod, initramfs, and optional module reload. Full log to /var/log/nvidia-repair.log
set -euo pipefail
# Set variables
# ========
LOG_FILE=/var/log/nvidia-repair.log
KVER=$(uname -r)
DKMS_M="nvidia-current"
DKMS_V="550.163.01"
UPD_DIR="/lib/modules/${KVER}/updates/dkms"
# Map Debian filenames -> canonical module names
typeset -A MAP
MAP=(
"nvidia-current.ko.xz" "nvidia.ko.xz"
"nvidia-current-modeset.ko.xz" "nvidia-modeset.ko.xz"
"nvidia-current-uvm.ko.xz" "nvidia-uvm.ko.xz"
"nvidia-current-drm.ko.xz" "nvidia-drm.ko.xz"
"nvidia-current-peermem.ko.xz" "nvidia-peermem.ko.xz"
)
# Knobs: export ISOLATE=0 / RELOAD=0 / UPDATE_INITRD=0 to tweak behavior
ISOLATE="${ISOLATE:-1}"
RELOAD="${RELOAD:-1}"
UPDATE_INITRD="${UPDATE_INITRD:-1}"
# Functions
# ========
log() { print -- "[DEBUG] $*"; }
die() { print -- "[ERROR] $*" >&2; exit 1; }
cleanup() {
local RC=$?
if [[ $RC -eq 0 ]]; then log "SUCCESS: NVIDIA DKMS repair finished."
else log "FAIL: script exited with code ${RC}."; fi
log "Full log: ${LOG_FILE}"
}
trap cleanup EXIT
# Logging (stdout+stderr to file and console)
# ========
mkdir -p /root/scripts /var/log
exec > >(tee -a "${LOG_FILE}") 2>&1
set -x
# Main Process
# ========
if [[ "${ISOLATE}" == "1" ]]; then
log "Switching to multi-user target (free modules) ..."
systemctl isolate multi-user.target || true
else
log "ISOLATE=0 -> Skipping systemctl isolate"
fi
log "Ensuring kernel headers for ${KVER} ..."
apt -y install "linux-headers-${KVER}"
log "Removing stale DKMS for this kernel (no --all) ..."
dkms remove -m "${DKMS_M}" -v "${DKMS_V}" -k "${KVER}" || true
log "Building DKMS ${DKMS_M}/${DKMS_V} for ${KVER} ..."
dkms build -m "${DKMS_M}" -v "${DKMS_V}" -k "${KVER}" || true
log "Installing DKMS ${DKMS_M}/${DKMS_V} for ${KVER} ..."
dkms install -m "${DKMS_M}" -v "${DKMS_V}" -k "${KVER}" || true
log "Ensuring updates dir exists: ${UPD_DIR}"
mkdir -p "${UPD_DIR}"
log "Creating canonical symlinks -> Debian '...-current' files"
cd "${UPD_DIR}" || die "Cannot cd ${UPD_DIR}"
for SRC DST in "${(@kv)MAP}"; do
if [[ -e "${SRC}" ]]; then
ln -sf "${SRC}" "${DST}"
fi
done
log "Verifying presence of .ko files (real or symlink):"
ls -l ${UPD_DIR}/nvidia*.ko* || die "No nvidia*.ko files present under ${UPD_DIR}"
log "Refreshing module dependencies (depmod) ..."
depmod -a "${KVER}"
if [[ "${UPDATE_INITRD}" == "1" ]]; then
log "Updating initramfs for ${KVER} ..."
update-initramfs -u -k "${KVER}"
else
log "UPDATE_INITRD=0 -> Skipping update-initramfs"
fi
log "Ensuring KMS option for nvidia-drm ..."
print -- 'options nvidia-drm modeset=1' > /etc/modprobe.d/nvidia-kms.conf || true
if [[ "${RELOAD}" == "1" ]]; then
log "Reloading NVIDIA modules cleanly ..."
modprobe -r nvidia_drm nvidia_uvm nvidia_modeset nvidia 2>/dev/null || true
modprobe nvidia modeset=1
modprobe nvidia_modeset
modprobe nvidia_uvm
modprobe nvidia_drm modeset=1
else
log "RELOAD=0 -> Skipping module reload"
fi
log "Sanity: modinfo by NAME"
for M in nvidia nvidia_modeset nvidia_uvm nvidia_drm; do
echo "[$M]"; modinfo "$M" | grep -E '^(filename|version)' || true
done
log "Sanity: LOADED versions"
for M in nvidia nvidia_modeset nvidia_uvm nvidia_drm; do
printf "[%s] " "$M"; cat /sys/module/$M/version 2>/dev/null || echo "not loaded"
done
log "Loaded modules summary:"
lsmod | egrep 'nvidia(_uvm|_modeset|_drm)?\b' || true
set +x
log "Done. If run from TTY, switch back with: systemctl isolate graphical.target"