diff --git a/nvidia-repair.sh b/nvidia-repair.sh new file mode 100755 index 0000000..1c499a2 --- /dev/null +++ b/nvidia-repair.sh @@ -0,0 +1,121 @@ +#!/bin/zsh +# Script Version: 04 +# Description: Repair NVIDIA DKMS for the current kernel; ensure canonical symlinks, depmod, initramfs, and optional module reload. Full log to /var/log/nvidia-repair.log + +set -euo pipefail + +# Set variables +# ======== +LOG_FILE=/var/log/nvidia-repair.log +KVER=$(uname -r) +DKMS_M="nvidia-current" +DKMS_V="550.163.01" +UPD_DIR="/lib/modules/${KVER}/updates/dkms" + +# Map Debian filenames -> canonical module names +typeset -A MAP +MAP=( + "nvidia-current.ko.xz" "nvidia.ko.xz" + "nvidia-current-modeset.ko.xz" "nvidia-modeset.ko.xz" + "nvidia-current-uvm.ko.xz" "nvidia-uvm.ko.xz" + "nvidia-current-drm.ko.xz" "nvidia-drm.ko.xz" + "nvidia-current-peermem.ko.xz" "nvidia-peermem.ko.xz" +) + +# Knobs: export ISOLATE=0 / RELOAD=0 / UPDATE_INITRD=0 to tweak behavior +ISOLATE="${ISOLATE:-1}" +RELOAD="${RELOAD:-1}" +UPDATE_INITRD="${UPDATE_INITRD:-1}" + +# Functions +# ======== +log() { print -- "[DEBUG] $*"; } +die() { print -- "[ERROR] $*" >&2; exit 1; } +cleanup() { + local RC=$? + if [[ $RC -eq 0 ]]; then log "SUCCESS: NVIDIA DKMS repair finished." + else log "FAIL: script exited with code ${RC}."; fi + log "Full log: ${LOG_FILE}" +} +trap cleanup EXIT + +# Logging (stdout+stderr to file and console) +# ======== +mkdir -p /root/scripts /var/log +exec > >(tee -a "${LOG_FILE}") 2>&1 +set -x + +# Main Process +# ======== +if [[ "${ISOLATE}" == "1" ]]; then + log "Switching to multi-user target (free modules) ..." + systemctl isolate multi-user.target || true +else + log "ISOLATE=0 -> Skipping systemctl isolate" +fi + +log "Ensuring kernel headers for ${KVER} ..." +apt -y install "linux-headers-${KVER}" + +log "Removing stale DKMS for this kernel (no --all) ..." +dkms remove -m "${DKMS_M}" -v "${DKMS_V}" -k "${KVER}" || true + +log "Building DKMS ${DKMS_M}/${DKMS_V} for ${KVER} ..." +dkms build -m "${DKMS_M}" -v "${DKMS_V}" -k "${KVER}" || true + +log "Installing DKMS ${DKMS_M}/${DKMS_V} for ${KVER} ..." +dkms install -m "${DKMS_M}" -v "${DKMS_V}" -k "${KVER}" || true + +log "Ensuring updates dir exists: ${UPD_DIR}" +mkdir -p "${UPD_DIR}" + +log "Creating canonical symlinks -> Debian '...-current' files" +cd "${UPD_DIR}" || die "Cannot cd ${UPD_DIR}" +for SRC DST in "${(@kv)MAP}"; do + if [[ -e "${SRC}" ]]; then + ln -sf "${SRC}" "${DST}" + fi +done + +log "Verifying presence of .ko files (real or symlink):" +ls -l ${UPD_DIR}/nvidia*.ko* || die "No nvidia*.ko files present under ${UPD_DIR}" + +log "Refreshing module dependencies (depmod) ..." +depmod -a "${KVER}" + +if [[ "${UPDATE_INITRD}" == "1" ]]; then + log "Updating initramfs for ${KVER} ..." + update-initramfs -u -k "${KVER}" +else + log "UPDATE_INITRD=0 -> Skipping update-initramfs" +fi + +log "Ensuring KMS option for nvidia-drm ..." +print -- 'options nvidia-drm modeset=1' > /etc/modprobe.d/nvidia-kms.conf || true + +if [[ "${RELOAD}" == "1" ]]; then + log "Reloading NVIDIA modules cleanly ..." + modprobe -r nvidia_drm nvidia_uvm nvidia_modeset nvidia 2>/dev/null || true + modprobe nvidia modeset=1 + modprobe nvidia_modeset + modprobe nvidia_uvm + modprobe nvidia_drm modeset=1 +else + log "RELOAD=0 -> Skipping module reload" +fi + +log "Sanity: modinfo by NAME" +for M in nvidia nvidia_modeset nvidia_uvm nvidia_drm; do + echo "[$M]"; modinfo "$M" | grep -E '^(filename|version)' || true +done + +log "Sanity: LOADED versions" +for M in nvidia nvidia_modeset nvidia_uvm nvidia_drm; do + printf "[%s] " "$M"; cat /sys/module/$M/version 2>/dev/null || echo "not loaded" +done + +log "Loaded modules summary:" +lsmod | egrep 'nvidia(_uvm|_modeset|_drm)?\b' || true + +set +x +log "Done. If run from TTY, switch back with: systemctl isolate graphical.target"