From a04156520394e1362a43faedb43c1ab28cf580a1 Mon Sep 17 00:00:00 2001
From: oib <andreas.fleckl@chello.at>
Date: Sat, 12 Apr 2025 09:56:11 +0200
Subject: [PATCH] Update 2025-04-12_09:56:11

---
 2mp3cudaRestoration.sh   |  54 +++++++++++
 calculate_bitrate.sh     |  46 +++++++++
 clean_local_bin.sh       |  57 +++++++++++
 convert2opus.sh          |  48 ++++++++++
 dns_health_check.sh      |  25 +++++
 fetch_transcript.sh      |  37 +++++++
 ffmpeg_video_compress.sh |  76 +++++++++++++++
 gitea_push.sh            | 201 +++++++++++++++++++++++++++++++++++++++
 gitea_push_debug.sh      |  46 +++++++++
 normalize.sh             |  63 ++++++++++++
 scraper.sh               | 189 ++++++++++++++++++++++++++++++++++++
 set_nvidia_fan.sh        |  12 +++
 soa_monitor.sh           |  47 +++++++++
 test_ns_zones.sh         |  41 ++++++++
 wrap_embeddings.sh       |  43 +++++++++
 15 files changed, 985 insertions(+)
 create mode 100755 2mp3cudaRestoration.sh
 create mode 100755 calculate_bitrate.sh
 create mode 100755 clean_local_bin.sh
 create mode 100755 convert2opus.sh
 create mode 100755 dns_health_check.sh
 create mode 100755 fetch_transcript.sh
 create mode 100755 ffmpeg_video_compress.sh
 create mode 100755 gitea_push.sh
 create mode 100755 gitea_push_debug.sh
 create mode 100755 normalize.sh
 create mode 100755 scraper.sh
 create mode 100755 set_nvidia_fan.sh
 create mode 100755 soa_monitor.sh
 create mode 100755 test_ns_zones.sh
 create mode 100755 wrap_embeddings.sh

diff --git a/2mp3cudaRestoration.sh b/2mp3cudaRestoration.sh
new file mode 100755
index 0000000..fec28b5
--- /dev/null
+++ b/2mp3cudaRestoration.sh
@@ -0,0 +1,54 @@
+#!/bin/bash
+# Script Version: 03
+# Description: Simple script to convert audio files to MP3 (192 kbps) using CUDA for acceleration.
+
+# Set variables
+# ========
+INPUT_DIR="$(pwd)"
+OUTPUT_DIR="$INPUT_DIR/output"
+BITRATE="192k"
+
+# Functions
+# ========
+convert_to_mp3() {
+    local INPUT_FILE="$1"
+    local OUTPUT_FILE="$2"
+
+    # Convert to MP3 with FFmpeg
+    ffmpeg -hwaccel cuda -i "$INPUT_FILE" -c:a libmp3lame -b:a "$BITRATE" "$OUTPUT_FILE" -y || return 1
+}
+
+# Main Process
+# ========
+echo "Starting audio conversion process..."
+mkdir -p "$OUTPUT_DIR"
+
+SUCCESS_COUNT=0
+TOTAL_COUNT=0
+
+shopt -s nullglob
+for FILE in "$INPUT_DIR"/*; do
+    if [[ -f "$FILE" ]]; then
+        BASENAME=$(basename "$FILE")
+        EXTENSION="${BASENAME##*.}"
+
+        # Skip unsupported extensions
+        if ! [[ "$EXTENSION" =~ ^(wav|flac|opus|m4a|mp3)$ ]]; then
+            echo "Skipping unsupported file: $FILE"
+            continue
+        fi
+
+        OUTPUT_FILE="$OUTPUT_DIR/${BASENAME%.*}.mp3"
+
+        echo "Converting $FILE to $OUTPUT_FILE"
+        if convert_to_mp3 "$FILE" "$OUTPUT_FILE"; then
+            SUCCESS_COUNT=$((SUCCESS_COUNT + 1))
+        fi
+        TOTAL_COUNT=$((TOTAL_COUNT + 1))
+    fi
+
+done
+shopt -u nullglob
+
+echo "Audio conversion process completed. Success: $SUCCESS_COUNT/$TOTAL_COUNT"
+
diff --git a/calculate_bitrate.sh b/calculate_bitrate.sh
new file mode 100755
index 0000000..6ca2ff7
--- /dev/null
+++ b/calculate_bitrate.sh
@@ -0,0 +1,46 @@
+#!/bin/bash
+
+# Check if the folder is provided as an argument
+if [ -z "$1" ]; then
+  echo "Usage: $0 <folder_with_videos>"
+  exit 1
+fi
+
+VIDEO_FOLDER="$1"
+OUTPUT_FILE="bitrates.txt"
+TOTAL_BITRATE=0
+VIDEO_COUNT=0
+
+# Clear or create the output file
+> "$OUTPUT_FILE"
+
+# Function to get bitrate of a video in Mbps
+get_bitrate() {
+  local video_file="$1"
+  bitrate_kbps=$(ffprobe -v error -select_streams v:0 -show_entries stream=bit_rate -of default=noprint_wrappers=1:nokey=1 "$video_file" | head -n 1)
+  if [[ "$bitrate_kbps" =~ ^[0-9]+$ ]]; then
+    bitrate_mbps=$(echo "scale=2; $bitrate_kbps / 1000 / 1000" | bc)
+    echo "$bitrate_mbps"
+  else
+    echo "0"
+  fi
+}
+
+# Iterate through each video file in the folder
+for video_file in "$VIDEO_FOLDER"/*; do
+  if [ -f "$video_file" ]; then
+    bitrate=$(get_bitrate "$video_file")
+    echo "File: $video_file - Bitrate: ${bitrate} Mbps" | tee -a "$OUTPUT_FILE"
+    TOTAL_BITRATE=$(echo "$TOTAL_BITRATE + $bitrate" | bc)
+    ((VIDEO_COUNT++))
+  fi
+done
+
+# Calculate the average bitrate
+if [ "$VIDEO_COUNT" -gt 0 ]; then
+  AVERAGE_BITRATE=$(echo "scale=2; $TOTAL_BITRATE / $VIDEO_COUNT" | bc)
+  echo "Average Bitrate: $AVERAGE_BITRATE Mbps" | tee -a "$OUTPUT_FILE"
+else
+  echo "No video files found in the specified folder." | tee -a "$OUTPUT_FILE"
+fi
+
diff --git a/clean_local_bin.sh b/clean_local_bin.sh
new file mode 100755
index 0000000..0639a38
--- /dev/null
+++ b/clean_local_bin.sh
@@ -0,0 +1,57 @@
+#!/bin/bash
+# Script Version: 0.1
+# Description: Interactive cleanup for stale Python tool CLI files in ~/.local/bin (at2 workstation)
+
+# Set variables
+# ========
+TARGET_DIR="$HOME/.local/bin"
+LOG_FILE="$HOME/.clean_local_bin.log"
+
+# Functions
+# ========
+
+# List suspected legacy tools
+list_legacy_tools() {
+  find "$TARGET_DIR" -maxdepth 1 -type f \( \
+    -name 'futurize' -o -name 'pasteurize' -o -name 'pkginfo' -o -name 'pybabel' \
+    -o -name 'pygmentize' -o -name 'webassets' -o -name 'wheel' \
+    -o -name 'mutagen-*' -o -name 'mid3*' -o -name 'moggsplit' \
+    -o -name 'filetype' -o -name 'normalizer' -o -name 'markdown*' \
+    -o -name 'jsonschema' -o -name 'httpx' -o -name 'openai' \
+    -o -name 'unidecode' -o -name 'netaddr' -o -name 'flask' \
+    -o -name 'pyserial-*' -o -name 'psg*' \
+  \)
+}
+
+# Main Process
+# ========
+echo "[DEBUG] Scanning $TARGET_DIR for workshop leftovers..."
+
+list_legacy_tools > /tmp/.local_bin_candidates.txt
+
+if [[ ! -s /tmp/.local_bin_candidates.txt ]]; then
+  echo "[DEBUG] Nothing found to delete."
+  exit 0
+fi
+
+echo "[DEBUG] Found the following candidates:"
+cat /tmp/.local_bin_candidates.txt
+
+echo "[DEBUG] Proceed with deletion? (y/n)"
+read CONFIRM
+if [[ "$CONFIRM" != "y" ]]; then
+  echo "[DEBUG] Aborted by user"
+  exit 1
+fi
+
+echo "[DEBUG] Deleting files and logging to $LOG_FILE"
+while read -r FILE; do
+  echo "[DEBUG] Removing $FILE"
+  echo "$(date) [DELETED] $FILE" >> "$LOG_FILE"
+  rm -v "$FILE"
+done < /tmp/.local_bin_candidates.txt
+
+echo "[DEBUG] Cleanup done."
+
+# EOF
+
diff --git a/convert2opus.sh b/convert2opus.sh
new file mode 100755
index 0000000..4b041be
--- /dev/null
+++ b/convert2opus.sh
@@ -0,0 +1,48 @@
+#!/bin/bash
+
+# Number of concurrent ffmpeg processes
+N=${1:-4}
+
+# Create a semaphore with a given number of slots
+semaphore=$(mktemp)
+exec 3<>$semaphore
+for ((i=0; i<N; i++)); do
+    echo >&3
+done
+
+# Debugging function
+debug_log() {
+    echo "[$(date '+%Y-%m-%d %H:%M:%S')] $@"
+}
+
+# Run ffmpeg command with semaphore locking and delete .wav file on success
+run_ffmpeg() {
+    local file=$1
+    local outfile="${file/%.wav/.opus}"
+
+    # Wait for a slot to become available
+    read -u 3
+    debug_log "Starting conversion: $file"
+
+    if ffmpeg -i "$file" -y -c:a libopus -vbr on -compression_level 5 -ar 48000 -application audio "$outfile"; then
+        debug_log "Completed conversion: $file"
+        debug_log "Deleting original file: $file"
+        rm "$file"
+    else
+        debug_log "Failed conversion: $file"
+    fi
+
+    # Release the slot
+    echo >&3
+}
+
+export -f run_ffmpeg
+export -f debug_log
+
+# Find all .wav files and convert them to .opus in parallel, respecting semaphore, and delete .wav files on success
+find . -maxdepth 1 -type f -name '*.wav' -print0 | xargs -0 -I {} -P $N bash -c 'run_ffmpeg "$@"' _ {}
+
+# Cleanup
+exec 3>&-
+rm -f $semaphore
+
diff --git a/dns_health_check.sh b/dns_health_check.sh
new file mode 100755
index 0000000..05345eb
--- /dev/null
+++ b/dns_health_check.sh
@@ -0,0 +1,25 @@
+#!/bin/bash
+# Script Version: 02
+# Description: Periodically verify consistency between ns1, ns2, and Google nameserver DNS records for a specific domain.
+
+# Set Variables
+# ========
+NS1="23.88.113.138"
+NS2="116.202.112.180"
+GOOGLE_NS="8.8.8.8"
+DOMAIN="es1.dynproxy.net"
+LOG_FILE="/var/log/dns_health_check.log"
+
+# Main Process
+# ========
+IP_NS1=$(dig @$NS1 $DOMAIN A +short)
+IP_NS2=$(dig @$NS2 $DOMAIN A +short)
+IP_GOOGLE=$(dig @$GOOGLE_NS $DOMAIN A +short)
+
+if [ "$IP_NS1" == "$IP_NS2" ] && [ "$IP_NS1" == "$IP_GOOGLE" ]; then
+    echo "[$(date)] DNS records are consistent across all nameservers: $IP_NS1" >> "$LOG_FILE"
+else
+    echo "[$(date)] DNS inconsistency detected!" >> "$LOG_FILE"
+    echo "[$(date)] ns1: $IP_NS1, ns2: $IP_NS2, Google: $IP_GOOGLE" >> "$LOG_FILE"
+fi
+
diff --git a/fetch_transcript.sh b/fetch_transcript.sh
new file mode 100755
index 0000000..2a6566e
--- /dev/null
+++ b/fetch_transcript.sh
@@ -0,0 +1,37 @@
+#!/bin/bash
+
+# Check if URL is provided
+if [ -z "$1" ]; then
+  echo "Usage: $0 <youtube_video_url>"
+  exit 1
+fi
+
+URL=$1
+VIDEO_ID=$(echo $URL | grep -o 'v=[^&]*' | cut -d '=' -f 2)
+if [ -z "$VIDEO_ID" ]; then
+  VIDEO_ID=$(echo $URL | grep -o '[^/]*$')
+fi
+OUTPUT_FILE="${VIDEO_ID}.txt"
+
+# Create a temporary Node.js script
+cat << EOF > fetch_transcript.js
+const { YoutubeTranscript } = require('youtube-transcript');
+const fs = require('fs');
+
+YoutubeTranscript.fetchTranscript('$VIDEO_ID')
+  .then(transcript => {
+    const transcriptText = transcript.map(item => item.text).join('\\n');
+    fs.writeFileSync('$OUTPUT_FILE', transcriptText);
+    console.log('Transcript saved to $OUTPUT_FILE');
+  })
+  .catch(err => {
+    console.error('Error fetching transcript:', err);
+  });
+EOF
+
+# Run the Node.js script
+node fetch_transcript.js
+
+# Clean up
+rm fetch_transcript.js
+
diff --git a/ffmpeg_video_compress.sh b/ffmpeg_video_compress.sh
new file mode 100755
index 0000000..5a4e80c
--- /dev/null
+++ b/ffmpeg_video_compress.sh
@@ -0,0 +1,76 @@
+#!/bin/zsh
+# Script Version: 14
+# Description: Compress a video using ffmpeg with NVIDIA CUDA for acceleration, aiming for a smaller file size. Push GPU usage and CPU multicore hyperthreading to maximize performance by parallel processing.
+
+# Set variables
+# ========
+INPUT_VIDEO="$1"
+OUTPUT_VIDEO="${INPUT_VIDEO%.*}_compressed.mkv"  # Output filename based on input with '_compressed' suffix and .mkv extension
+TEMP_DIR="/tmp/ffmpeg_chunks"
+CHUNK_DURATION=30  # Split video into 30-second chunks for parallel processing
+NUM_CHUNKS=4  # Limit to 4 chunks
+
+# Main Process
+# ========
+if [[ -z "$INPUT_VIDEO" ]]; then
+  echo "Usage: $0 <input_video>"
+  exit 1
+fi
+
+# Check if GNU Parallel is installed
+if ! command -v parallel &> /dev/null; then
+  echo "GNU Parallel is required but not installed. Please install it with: apt-get install parallel"
+  exit 1
+fi
+
+# Create a temporary directory for storing chunks
+mkdir -p "$TEMP_DIR"
+
+# Split the input video into smaller chunks, ensuring proper timestamps and avoiding timestamp issues
+ffmpeg -fflags +genpts -copyts -i "$INPUT_VIDEO" -c copy -map 0 -segment_time "$CHUNK_DURATION" -reset_timestamps 1 -f segment "$TEMP_DIR/chunk_%03d.mkv"
+
+# Verify if splitting succeeded
+if [[ $? -ne 0 ]]; then
+  echo "Error: Failed to split the video into chunks."
+  rm -rf "$TEMP_DIR"
+  exit 1
+fi
+
+# Limit the number of chunks to 4
+CHUNKS=$(ls "$TEMP_DIR"/chunk_*.mkv | head -n "$NUM_CHUNKS")
+
+# Compress each chunk in parallel using GNU Parallel
+echo "$CHUNKS" | parallel -j "$NUM_CHUNKS" ffmpeg -hwaccel cuda -i {} -c:v hevc_nvenc -preset p1 -rc constqp -qp 20 -b:v 5M -maxrate 10M -bufsize 20M -c:a copy {.}_compressed.mkv
+
+# Verify if compression succeeded
+if [[ $? -ne 0 ]]; then
+  echo "Error: Compression failed for one or more chunks."
+  rm -rf "$TEMP_DIR"
+  exit 1
+fi
+
+# Concatenate the compressed chunks into the final output file
+ls "$TEMP_DIR"/*_compressed.mkv | sort | xargs -I {} echo "file '{}'" > "$TEMP_DIR/file_list.txt"
+ffmpeg -f concat -safe 0 -i "$TEMP_DIR/file_list.txt" -c copy "$OUTPUT_VIDEO"
+
+# Verify if concatenation succeeded
+if [[ $? -ne 0 ]]; then
+  echo "Error: Failed to concatenate the compressed chunks."
+  rm -rf "$TEMP_DIR"
+  exit 1
+fi
+
+# Clean up temporary files
+rm -rf "$TEMP_DIR"
+
+# Output status
+if [[ -f "$OUTPUT_VIDEO" ]]; then
+  echo "Compression complete. Output file: $OUTPUT_VIDEO"
+else
+  echo "Compression failed. Output file was not created."
+  exit 1
+fi
+
+# Display file sizes
+ls -lh "$INPUT_VIDEO" "$OUTPUT_VIDEO" | awk '{print $9, $5}'
+
diff --git a/gitea_push.sh b/gitea_push.sh
new file mode 100755
index 0000000..2b78169
--- /dev/null
+++ b/gitea_push.sh
@@ -0,0 +1,201 @@
+#!/bin/zsh
+# Script Version: 1.3
+# Description: Pushes the current folder (e.g. /etc) to a nested Gitea repo using provided nesting arguments. Auto-creates the remote repo via Gitea API if missing.
+
+# Set variables
+# ========
+
+# Try to extract GITEA_API_TOKEN from ~/.netrc if present
+if [ -z "$GITEA_API_TOKEN" ] && grep -q '^GITEA_API_TOKEN=' ~/.netrc 2>/dev/null; then
+  GITEA_API_TOKEN=$(grep '^GITEA_API_TOKEN=' ~/.netrc | head -n1 | cut -d= -f2 | xargs)
+  export GITEA_API_TOKEN
+fi
+GITEA_USER="oib"
+GITEA_URL="https://gitea.bubuit.net"
+GITEA_API_URL="$GITEA_URL/api/v1"
+PRIVATE=false
+DEBUG=false
+COMMIT_MESSAGE="Update $(date +%F_%T)"
+
+# Logging function
+# ========
+log() {
+  local level="$1"; shift
+  if [ "$level" = "DEBUG" ] && [ "$DEBUG" != true ]; then return; fi
+  local color_reset="$(tput sgr0)"
+  local color=""
+  case "$level" in
+    INFO) color="$(tput setaf 2)" ;;    # green
+    WARNING) color="$(tput setaf 3)" ;; # yellow
+    ERROR) color="$(tput setaf 1)" ;;   # red
+    DEBUG) color="$(tput setaf 4)" ;;   # blue
+  esac
+  echo "${color}[$level] $*${color_reset}"
+}
+
+# Functions
+# ========
+create_repo() {
+  log INFO "Repository does not exist. Creating via API: $REMOTE_PATH"
+  log DEBUG "POST $GITEA_API_URL/user/repos with name=$REMOTE_PATH and private=$PRIVATE"
+  RESPONSE=$(curl -s -X POST \
+    -H "Authorization: token $GITEA_API_TOKEN" \
+    -H "Content-Type: application/json" \
+    -d "{\"name\": \"$FOLDER_NAME\", \"private\": $PRIVATE}" \
+    "$GITEA_API_URL/user/repos")
+
+  if echo "$RESPONSE" | grep -q '"clone_url"'; then
+    log INFO "Remote repository created successfully."
+  else
+    log ERROR "Failed to create remote repository: $RESPONSE"
+    exit 1
+  fi
+}
+
+prepare_commit() {
+  git add .
+  if ! git rev-parse --verify HEAD >/dev/null 2>&1; then
+    log INFO "Creating initial commit"
+    git commit -m "$COMMIT_MESSAGE"
+  else
+    log INFO "Committing changes"
+    git commit -m "$COMMIT_MESSAGE" || log INFO "Nothing to commit"
+  fi
+}
+
+setup_remote() {
+  if git remote | grep -q '^origin$'; then
+    log INFO "Updating remote origin URL"
+    git remote set-url origin "$GIT_REMOTE"
+  else
+    log INFO "Adding remote origin"
+    git remote add origin "$GIT_REMOTE"
+  fi
+}
+
+push_changes() {
+  log INFO "Pushing to $GIT_REMOTE"
+  git push -u origin main
+}
+
+# Show help if no arguments are given
+# ========
+if [ $# -eq 0 ]; then
+  echo "GITEA_API_TOKEN=<your token>"
+  echo "Usage: $0 [--private] [--debug] [--message \"your commit message\"] <host_group>"
+  echo "Example: $0 server"
+  echo "         $0 --private workstation"
+  echo "         $0 --debug server"
+  echo "         $0 --message \"minor update\" server"
+  echo
+  echo "Note: You must cd into the target folder before running this script."
+  echo "For example:"
+  echo "  cd /etc && $0 server"
+  echo
+  echo "Authentication:"
+  echo "  Git uses ~/.netrc for authentication. You can create it like this:"
+  echo "  echo \"machine \$(echo \"$GITEA_URL\" | sed 's|https\\?://||') login $GITEA_USER password \"<your Git token or app password>\"\" > ~/.netrc"
+  echo "  chmod 600 ~/.netrc"
+  exit 0
+fi
+
+# Parse arguments
+# ========
+POSITIONAL_ARGS=()
+while [[ $# -gt 0 ]]; do
+  case "$1" in
+    --private)
+      PRIVATE=true
+      shift
+      ;;
+    --debug)
+      DEBUG=true
+      shift
+      ;;
+    --message)
+      COMMIT_MESSAGE="$2"
+      shift 2
+      ;;
+    *)
+      POSITIONAL_ARGS+=("$1")
+      shift
+      ;;
+  esac
+done
+
+set -- "${POSITIONAL_ARGS[@]}"
+
+if [ $# -ne 1 ]; then
+  echo "Usage: $0 [--private] [--debug] [--message \"your commit message\"] <host_group>"
+  exit 1
+fi
+
+HOST_GROUP=$(echo "$1" | tr '[:upper:]' '[:lower:]' | tr -cd 'a-z0-9-')
+HOST_NAME=$(hostname -s | tr '[:upper:]' '[:lower:]' | tr -cd 'a-z0-9-')
+HOST_NAME=$(hostname -s | tr '[:upper:]' '[:lower:]' | tr -cd 'a-z0-9-')
+FOLDER_NAME="${HOST_NAME}-$(basename "$PWD")"
+REPO_PATH="$PWD"
+REMOTE_PATH="$FOLDER_NAME"
+GIT_REMOTE="https://gitea.bubuit.net/$GITEA_USER/$FOLDER_NAME.git"
+
+# Git authentication hint
+# export GIT_ASKPASS=true  # disabled: does not affect authentication without handler
+log DEBUG "Ensure ~/.netrc has:
+machine gitea.bubuit.net login $GITEA_USER password <personal access token>"
+
+# Check if GITEA_API_TOKEN is set
+if [ -z "$GITEA_API_TOKEN" ]; then
+  log WARNING "GITEA_API_TOKEN is not set. Skipping API repo creation."
+else
+  # Check if remote repo exists
+    HTTP_STATUS=$(curl -s -o /dev/null -w "%{http_code}" \
+    -H "Authorization: token $GITEA_API_TOKEN" \
+    "$GITEA_API_URL/repos/$GITEA_USER/$FOLDER_NAME")
+
+  if [ "$HTTP_STATUS" -ne 200 ]; then
+    create_repo
+  else
+    log INFO "Remote repository already exists."
+  fi
+fi
+
+# Main Process
+# ========
+
+# Safety check against pushing from / or $HOME
+if [[ "$PWD" == "$HOME" || "$PWD" == "/" ]]; then
+  log ERROR "Refusing to run inside \$PWD=$PWD"
+  exit 1
+fi
+log INFO "Pushing $REPO_PATH to $GIT_REMOTE"
+cd "$REPO_PATH" || { log ERROR "Directory $REPO_PATH not found"; exit 1; }
+
+# Initialize git if needed
+# Branch is fixed to 'main' for simplicity and consistency
+if [ ! -d .git ]; then
+  log INFO "Initializing Git repo"
+  git init
+  git config init.defaultBranch main
+  git checkout -b main
+else
+  log DEBUG ".git directory already present"
+fi
+
+# Ensure at least one commit exists
+prepare_commit
+
+
+# Set or update remote
+if [ "$HTTP_STATUS" -eq 200 ]; then
+  setup_remote
+else
+  log WARNING "Skipping remote setup – repository does not exist."
+fi
+
+# Push to remote
+if [ "$HTTP_STATUS" -eq 200 ]; then
+  push_changes
+else
+  log WARNING "Skipping push – repository does not exist."
+fi
+
diff --git a/gitea_push_debug.sh b/gitea_push_debug.sh
new file mode 100755
index 0000000..12e4f39
--- /dev/null
+++ b/gitea_push_debug.sh
@@ -0,0 +1,46 @@
+#!/bin/zsh
+# Script Version: 1.0 Debug
+# Description: Extended debug version of Gitea push script for diagnosing issues.
+
+# 1) Basic variables
+GITEA_USER="oib"
+GITEA_URL="https://gitea.bubuit.net"
+GITEA_API_URL="$GITEA_URL/api/v1"
+
+# 2) Debug function
+log_debug() {
+  echo "[DEBUG] $@"
+}
+
+log_debug "== Starting gitea_push_debug.sh =="
+
+# 3) Show environment
+log_debug "Home Dir: $HOME"
+log_debug "PWD: $PWD"
+log_debug "User: $USER"
+
+# 4) Check GITEA_API_TOKEN from environment
+if [ -z "$GITEA_API_TOKEN" ]; then
+  log_debug "GITEA_API_TOKEN is not set in environment"
+else
+  log_debug "GITEA_API_TOKEN is present, length: ${#GITEA_API_TOKEN}"
+fi
+
+# 5) Attempt to read from ~/.gitea_token
+if [ -f "$HOME/.gitea_token" ]; then
+  TOKEN_FILE_CONTENT=$(cat "$HOME/.gitea_token")
+  log_debug "~/.gitea_token found, length: ${#TOKEN_FILE_CONTENT}"
+else
+  log_debug "~/.gitea_token not found"
+fi
+
+# 6) Try an API request to /user with the token from environment
+if [ -n "$GITEA_API_TOKEN" ]; then
+  USER_RESPONSE=$(curl -s -H "Authorization: token $GITEA_API_TOKEN" "$GITEA_API_URL/user")
+  log_debug "Response from /user: $USER_RESPONSE"
+else
+  log_debug "Skipping /user request; no valid GITEA_API_TOKEN in environment."
+fi
+
+log_debug "== End gitea_push_debug.sh =="
+
diff --git a/normalize.sh b/normalize.sh
new file mode 100755
index 0000000..1068ddb
--- /dev/null
+++ b/normalize.sh
@@ -0,0 +1,63 @@
+#!/bin/bash
+
+# Check for ffmpeg dependency
+if ! command -v ffmpeg &> /dev/null; then
+    echo "ffmpeg could not be found. Please install ffmpeg to use this script."
+    exit 1
+fi
+
+# Number of concurrent ffmpeg processes
+N=${1:-8}
+
+# Semaphore file for limiting concurrent processes
+semaphore=/tmp/ffmpeg.lock
+
+# Opening the semaphore file descriptor on 9
+exec 9>"$semaphore"
+
+# Function to run an ffmpeg command with semaphore locking
+run_ffmpeg() {
+  # Wait for a semaphore slot to become available
+  flock -x 9
+
+  # Execute the ffmpeg command
+  ffmpeg -y -i "$1" -af "loudnorm=I=-16:LRA=11:measured_I=-20:measured_LRA=16,volume=0.8" "$2"
+  if [ $? -ne 0 ]; then
+    echo "An error occurred with ffmpeg processing $1"
+    # Release the semaphore slot on error as well
+    flock -u 9
+    return 1
+  fi
+
+  # Release the semaphore slot
+  flock -u 9
+}
+
+# Create the semaphore file if it does not exist
+touch "$semaphore"
+if [ ! -f "$semaphore" ]; then
+    echo "Failed to create semaphore file."
+    exit 1
+fi
+
+# Processing each .opus file
+find . -maxdepth 1 -type f -name '*.m4a' | while read -r file; do
+  wav_file="${file/%.m4a/.wav}"
+  if [ ! -f "$wav_file" ]; then
+    echo "Processing $file..."
+    run_ffmpeg "$file" "$wav_file" &
+
+    # Ensure N parallel ffmpeg processes
+    while [ $(jobs -p | wc -l) -ge "$N" ]; do
+      wait -n
+    done
+  fi
+done
+
+# Wait for all background jobs to finish
+wait
+
+# Close the semaphore file descriptor and remove the file
+exec 9>&-
+rm -f "$semaphore"
+
diff --git a/scraper.sh b/scraper.sh
new file mode 100755
index 0000000..c9bd32e
--- /dev/null
+++ b/scraper.sh
@@ -0,0 +1,189 @@
+#!/bin/bash
+# Script Version: 01.8
+# Description: Scrapes and extracts page text from MediaWiki pages, cleans image artifacts, and deletes empty results
+
+# Constants
+DOWNLOAD_TIMEOUT=10
+TEXT_FILE_SUFFIX=".txt"
+LINK_FILE_SUFFIX=".txt"
+
+# Function to convert relative URLs to absolute URLs
+resolve_url() {
+    local base_url=$1
+    local relative_url=$2
+
+    if [[ "$relative_url" =~ ^https?:// ]]; then
+        echo "$relative_url"
+    elif [[ "$relative_url" =~ ^/ ]]; then
+        echo "${base_url}${relative_url}"
+    else
+        echo "${base_url}/${relative_url}"
+    fi
+}
+
+# Function to check if URL should be skipped
+should_skip_url() {
+    local url=$1
+    case "$url" in
+        *"load.php"*|*"IE9fixes.css"*|*"favicon.ico"*|*"opensearch_desc.php"*|*"api.php?action="*|*"Special:RecentChanges"*|*"Special:UserLogin"*|*"Special:RequestAccount"*|*"Dioxipedia:Privacy_policy"*|*"javascript:print();"*|*"mediawiki.org"*)
+            return 0 ;;  # true, should skip
+        *)
+            return 1 ;;  # false, don't skip
+    esac
+}
+
+# Function to download content into a variable with timeout and error checking
+download_content() {
+    local url=$1
+    local exclude_file=$2
+
+    if should_skip_url "$url"; then
+        echo "Skipping known irrelevant URL: $url"
+        return 1
+    fi
+
+    if [ -f "$exclude_file" ] && grep -Fx "$url" "$exclude_file" > /dev/null; then
+        echo "Skipping excluded URL: $url"
+        return 1
+    fi
+
+    echo "Downloading: $url"
+    SITECACHE=$(wget -T "$DOWNLOAD_TIMEOUT" -q -O - "$url" 2>/dev/null)
+    if [ $? -ne 0 ] || [ -z "$SITECACHE" ]; then
+        echo -e "\033[31m[ ERROR ]:\033[0m Failed to download $url" >&2
+        echo "$url" >> "$exclude_file"
+        return 1
+    fi
+    if ! echo "$SITECACHE" | grep -q "<html"; then
+        echo "Skipping: $url (not HTML)"
+        echo "$url" >> "$exclude_file"
+        return 1
+    fi
+    sleep 1
+    echo "Successfully downloaded: $url"
+    return 0
+}
+
+# Improved extraction function using pup and lynx
+extract_text() {
+    local output_file=$1
+    local url=$2
+    local exclude_file=$3
+
+    echo "Extracting text from SITECACHE to $output_file"
+
+    EXTRACTED=$(echo "$SITECACHE" | pup '#mw-content-text' 2>/dev/null)
+
+    if [ -z "$(echo "$EXTRACTED" | sed '/^\s*$/d')" ]; then
+        echo "INFO: Content empty with #mw-content-text, trying #bodyContent"
+        EXTRACTED=$(echo "$SITECACHE" | pup '#bodyContent' 2>/dev/null)
+    fi
+
+    if [ -z "$(echo "$EXTRACTED" | sed '/^\s*$/d')" ]; then
+        echo "WARNING: Still no content after both selectors."
+        echo "$url" >> "$exclude_file"
+        return 1
+    fi
+
+    echo "$EXTRACTED" | lynx -stdin -dump -nolist > "$output_file"
+
+    if [ ! -s "$output_file" ]; then
+        echo "WARNING: No text extracted from $url after lynx"
+        echo "$url" >> "$exclude_file"
+        rm -f "$output_file"
+        return 1
+    fi
+
+    # Remove lines containing image artifacts like [something.jpg] or [something.png]
+    sed -i '/\[.*\(jpg\|jpeg\|png\).*]/Id' "$output_file"
+
+    # Delete if file is smaller than 100 bytes
+    if [ $(stat -c%s "$output_file") -lt 100 ]; then
+        echo "INFO: Deleted $output_file (under 100 bytes)"
+        rm -f "$output_file"
+        echo "$url" >> "$exclude_file"
+        return 1
+    fi
+
+    echo "Successfully extracted text to $output_file"
+    return 0
+}
+
+# Function to extract page title
+extract_title() {
+    echo "$SITECACHE" | grep -oP '(?<=<title>).*(?=</title>)' | head -n 1 | sed 's/ - dioxipedia$//' | sed 's/[^a-zA-Z0-9-]/_/g' | sed 's/__*/_/g' | sed 's/^_//;s/_$//'
+}
+
+# Function to extract links
+extract_links() {
+    local output_file=$1
+
+    echo "$SITECACHE" | grep -oP '(?<=href=")[^"]+' | grep -v 'translate\.goog' > "$output_file"
+    if [ $? -ne 0 ] || [ ! -s "$output_file" ]; then
+        echo "WARNING: No links extracted"
+        rm -f "$output_file"
+        return 1
+    fi
+    echo "Successfully extracted links to $output_file"
+    return 0
+}
+
+# Main script logic
+if [ $# -ne 1 ]; then
+    echo "Usage: $0 <URL>" >&2
+    exit 1
+fi
+
+INITIAL_URL=$1
+DOMAIN=$(echo "$INITIAL_URL" | awk -F[/:] '{print $4}')
+BASE_URL="https://$DOMAIN"
+ALL_PAGES_URL="$BASE_URL/index.php?title=Special:AllPages"
+LINKSFILE="$DOMAIN/links$LINK_FILE_SUFFIX"
+EXCLUDE_FILE="$DOMAIN/exclude.txt"
+CONTENT_DIR="$DOMAIN/content"
+
+mkdir -p "$DOMAIN"
+mkdir -p "$CONTENT_DIR"
+
+# Step 1: Collect links
+if ! download_content "$ALL_PAGES_URL" "$EXCLUDE_FILE"; then
+    echo "Failed to download $ALL_PAGES_URL"
+    exit 1
+fi
+
+if ! extract_links "$LINKSFILE"; then
+    echo "Failed to extract links"
+    exit 1
+fi
+
+# Step 2: Process links
+PROCESSED_URLS=()
+while IFS= read -r link; do
+    URL=$(resolve_url "$BASE_URL" "$link")
+
+    if [[ " ${PROCESSED_URLS[*]} " =~ " $URL " ]]; then
+        echo "Skipping processed URL: $URL"
+        continue
+    fi
+
+    if ! download_content "$URL" "$EXCLUDE_FILE"; then
+        PROCESSED_URLS+=("$URL")
+        continue
+    fi
+
+    PAGENAME=$(extract_title)
+    [ -z "$PAGENAME" ] && PAGENAME="page"
+
+    TEXTFILE="$CONTENT_DIR/$PAGENAME$TEXT_FILE_SUFFIX"
+
+    if ! extract_text "$TEXTFILE" "$URL" "$EXCLUDE_FILE"; then
+        PROCESSED_URLS+=("$URL")
+        continue
+    fi
+
+    PROCESSED_URLS+=("$URL")
+done < "$LINKSFILE"
+
+echo "Processing complete."
+exit 0
+
diff --git a/set_nvidia_fan.sh b/set_nvidia_fan.sh
new file mode 100755
index 0000000..bee3d25
--- /dev/null
+++ b/set_nvidia_fan.sh
@@ -0,0 +1,12 @@
+#!/bin/bash
+# Script Version: 03
+# Description: Set NVIDIA GPU fan speed on startup.
+
+# Define the display and X authority paths
+export DISPLAY=:1
+export XAUTHORITY=/run/user/1000/gdm/Xauthority
+
+# Run nvidia-settings commands as root
+sudo /usr/bin/nvidia-settings -c :1 -a "[gpu:0]/GPUFanControlState=1"
+sudo /usr/bin/nvidia-settings -c :1 -a "[fan:0]/GPUTargetFanSpeed=10"
+
diff --git a/soa_monitor.sh b/soa_monitor.sh
new file mode 100755
index 0000000..0c23f8f
--- /dev/null
+++ b/soa_monitor.sh
@@ -0,0 +1,47 @@
+#!/bin/bash
+# Script Version: 07
+# Description: Monitor SOA for dynproxy.net and play a sound when it changes to ns1.dynproxy.net.
+
+# Set variables
+# ========
+DOMAIN="dynproxy.net"
+EXPECTED_NS="ns1.dynproxy.net."
+SOUND_CMD="paplay /usr/share/sounds/freedesktop/stereo/alarm-clock-elapsed.oga"
+CHECK_INTERVAL=60 # Interval in seconds
+
+# Functions
+# ========
+get_soa() {
+    dig "$DOMAIN" SOA +short | awk '{print $1}'
+}
+
+play_sound() {
+    $SOUND_CMD
+}
+
+# Main Process
+# ========
+echo "Monitoring SOA for $DOMAIN. Expected NS: $EXPECTED_NS"
+LAST_SOA=""
+
+while true; do
+    CURRENT_SOA=$(get_soa)
+
+    if [[ -z "$CURRENT_SOA" ]]; then
+        echo "Error fetching SOA record. Network issue or domain unreachable."
+        sleep $CHECK_INTERVAL
+        continue
+    fi
+
+    if [[ "$CURRENT_SOA" != "$LAST_SOA" ]]; then
+        echo "SOA changed! New SOA: $CURRENT_SOA"
+        LAST_SOA="$CURRENT_SOA"
+
+        if [[ "$CURRENT_SOA" == "$EXPECTED_NS" ]]; then
+            echo "SOA matches expected NS. Playing sound..."
+            play_sound
+        fi
+    fi
+    sleep $CHECK_INTERVAL
+done
+
diff --git a/test_ns_zones.sh b/test_ns_zones.sh
new file mode 100755
index 0000000..1d61bdf
--- /dev/null
+++ b/test_ns_zones.sh
@@ -0,0 +1,41 @@
+#!/bin/zsh
+# Version 01.0
+# Script to test DNS zone propagation across ns1, ns2, and ns3
+# Script Name: test_ns_zones.sh
+
+# Variables
+NS1="23.88.113.138"
+NS2="116.202.112.180"
+NS3="95.216.198.140"
+
+# Check if a domain name argument is provided
+if [ -z "$1" ]; then
+	echo "Usage: $0 <domain.tld>"
+	exit 1
+fi
+
+DOMAIN=$1
+
+# Function to test a DNS query
+function test_ns {
+	local NS=$1
+	echo "
+=== Testing $DOMAIN on $NS ==="
+	dig @$NS $DOMAIN SOA +short
+echo ""
+	echo "MX Record:"
+	dig @$NS $DOMAIN MX +short
+echo ""
+	echo "A Record for mail.$DOMAIN:"
+	dig @$NS mail.$DOMAIN A +short
+echo ""
+}
+
+# Test each nameserver
+test_ns $NS1
+test_ns $NS2
+test_ns $NS3
+
+# Success message
+echo "DNS zone test completed for $DOMAIN"
+
diff --git a/wrap_embeddings.sh b/wrap_embeddings.sh
new file mode 100755
index 0000000..a905c93
--- /dev/null
+++ b/wrap_embeddings.sh
@@ -0,0 +1,43 @@
+#!/bin/bash
+# Script Version: 0.3
+# Description: Convert each .txt in content/ to .json with embedding in json/
+
+# Set variables
+CONTENT_DIR="./content"
+JSON_DIR="./json"
+EMBEDDING_MODEL="sentence-transformers/all-MiniLM-L6-v2"
+
+# Check dependencies
+if ! python3 -c "import sentence_transformers" 2>/dev/null; then
+  echo "[ERROR] ❌ sentence-transformers not installed. Run: pip3 install sentence-transformers"
+  exit 1
+fi
+
+# Check input files
+mkdir -p "$JSON_DIR"
+if [ ! -d "$CONTENT_DIR" ] || ! ls "$CONTENT_DIR"/*.txt >/dev/null 2>&1; then
+  echo "[ERROR] ❌ No .txt files found in $CONTENT_DIR"
+  exit 1
+fi
+
+# Generate embeddings
+python3 -c "
+import sys, json, os
+from sentence_transformers import SentenceTransformer
+content_dir, json_dir = sys.argv[1], sys.argv[2]
+model = SentenceTransformer('${EMBEDDING_MODEL}')
+for txt_file in os.listdir(content_dir):
+    if txt_file.endswith('.txt'):
+        base_name = txt_file[:-4]
+        try:
+            with open(os.path.join(content_dir, txt_file), 'r', encoding='utf-8') as f:
+                text = f.read()
+            embedding = model.encode([text])[0].tolist()
+            with open(os.path.join(json_dir, f'{base_name}.json'), 'w') as f:
+                json.dump({'id': base_name, 'text': text, 'embedding': embedding}, f)
+            print(f'[DEBUG] ✅ Saved: {json_dir}/{base_name}.json')
+        except Exception as e:
+            print(f'[ERROR] ❌ Failed: {txt_file} - {str(e)}', file=sys.stderr)
+" "$CONTENT_DIR" "$JSON_DIR" 2>&1 | while read -r line; do echo "$line"; done
+
+echo "✅ All .txt files converted to JSON with embeddings in $JSON_DIR"