Update 2025-04-12_09:56:11

This commit is contained in:
oib
2025-04-12 09:56:11 +02:00
commit a041565203
15 changed files with 985 additions and 0 deletions

54
2mp3cudaRestoration.sh Executable file
View File

@ -0,0 +1,54 @@
#!/bin/bash
# Script Version: 03
# Description: Simple script to convert audio files to MP3 (192 kbps) using CUDA for acceleration.
# Set variables
# ========
INPUT_DIR="$(pwd)"
OUTPUT_DIR="$INPUT_DIR/output"
BITRATE="192k"
# Functions
# ========
convert_to_mp3() {
local INPUT_FILE="$1"
local OUTPUT_FILE="$2"
# Convert to MP3 with FFmpeg
ffmpeg -hwaccel cuda -i "$INPUT_FILE" -c:a libmp3lame -b:a "$BITRATE" "$OUTPUT_FILE" -y || return 1
}
# Main Process
# ========
echo "Starting audio conversion process..."
mkdir -p "$OUTPUT_DIR"
SUCCESS_COUNT=0
TOTAL_COUNT=0
shopt -s nullglob
for FILE in "$INPUT_DIR"/*; do
if [[ -f "$FILE" ]]; then
BASENAME=$(basename "$FILE")
EXTENSION="${BASENAME##*.}"
# Skip unsupported extensions
if ! [[ "$EXTENSION" =~ ^(wav|flac|opus|m4a|mp3)$ ]]; then
echo "Skipping unsupported file: $FILE"
continue
fi
OUTPUT_FILE="$OUTPUT_DIR/${BASENAME%.*}.mp3"
echo "Converting $FILE to $OUTPUT_FILE"
if convert_to_mp3 "$FILE" "$OUTPUT_FILE"; then
SUCCESS_COUNT=$((SUCCESS_COUNT + 1))
fi
TOTAL_COUNT=$((TOTAL_COUNT + 1))
fi
done
shopt -u nullglob
echo "Audio conversion process completed. Success: $SUCCESS_COUNT/$TOTAL_COUNT"

46
calculate_bitrate.sh Executable file
View File

@ -0,0 +1,46 @@
#!/bin/bash
# Check if the folder is provided as an argument
if [ -z "$1" ]; then
echo "Usage: $0 <folder_with_videos>"
exit 1
fi
VIDEO_FOLDER="$1"
OUTPUT_FILE="bitrates.txt"
TOTAL_BITRATE=0
VIDEO_COUNT=0
# Clear or create the output file
> "$OUTPUT_FILE"
# Function to get bitrate of a video in Mbps
get_bitrate() {
local video_file="$1"
bitrate_kbps=$(ffprobe -v error -select_streams v:0 -show_entries stream=bit_rate -of default=noprint_wrappers=1:nokey=1 "$video_file" | head -n 1)
if [[ "$bitrate_kbps" =~ ^[0-9]+$ ]]; then
bitrate_mbps=$(echo "scale=2; $bitrate_kbps / 1000 / 1000" | bc)
echo "$bitrate_mbps"
else
echo "0"
fi
}
# Iterate through each video file in the folder
for video_file in "$VIDEO_FOLDER"/*; do
if [ -f "$video_file" ]; then
bitrate=$(get_bitrate "$video_file")
echo "File: $video_file - Bitrate: ${bitrate} Mbps" | tee -a "$OUTPUT_FILE"
TOTAL_BITRATE=$(echo "$TOTAL_BITRATE + $bitrate" | bc)
((VIDEO_COUNT++))
fi
done
# Calculate the average bitrate
if [ "$VIDEO_COUNT" -gt 0 ]; then
AVERAGE_BITRATE=$(echo "scale=2; $TOTAL_BITRATE / $VIDEO_COUNT" | bc)
echo "Average Bitrate: $AVERAGE_BITRATE Mbps" | tee -a "$OUTPUT_FILE"
else
echo "No video files found in the specified folder." | tee -a "$OUTPUT_FILE"
fi

57
clean_local_bin.sh Executable file
View File

@ -0,0 +1,57 @@
#!/bin/bash
# Script Version: 0.1
# Description: Interactive cleanup for stale Python tool CLI files in ~/.local/bin (at2 workstation)
# Set variables
# ========
TARGET_DIR="$HOME/.local/bin"
LOG_FILE="$HOME/.clean_local_bin.log"
# Functions
# ========
# List suspected legacy tools
list_legacy_tools() {
find "$TARGET_DIR" -maxdepth 1 -type f \( \
-name 'futurize' -o -name 'pasteurize' -o -name 'pkginfo' -o -name 'pybabel' \
-o -name 'pygmentize' -o -name 'webassets' -o -name 'wheel' \
-o -name 'mutagen-*' -o -name 'mid3*' -o -name 'moggsplit' \
-o -name 'filetype' -o -name 'normalizer' -o -name 'markdown*' \
-o -name 'jsonschema' -o -name 'httpx' -o -name 'openai' \
-o -name 'unidecode' -o -name 'netaddr' -o -name 'flask' \
-o -name 'pyserial-*' -o -name 'psg*' \
\)
}
# Main Process
# ========
echo "[DEBUG] Scanning $TARGET_DIR for workshop leftovers..."
list_legacy_tools > /tmp/.local_bin_candidates.txt
if [[ ! -s /tmp/.local_bin_candidates.txt ]]; then
echo "[DEBUG] Nothing found to delete."
exit 0
fi
echo "[DEBUG] Found the following candidates:"
cat /tmp/.local_bin_candidates.txt
echo "[DEBUG] Proceed with deletion? (y/n)"
read CONFIRM
if [[ "$CONFIRM" != "y" ]]; then
echo "[DEBUG] Aborted by user"
exit 1
fi
echo "[DEBUG] Deleting files and logging to $LOG_FILE"
while read -r FILE; do
echo "[DEBUG] Removing $FILE"
echo "$(date) [DELETED] $FILE" >> "$LOG_FILE"
rm -v "$FILE"
done < /tmp/.local_bin_candidates.txt
echo "[DEBUG] Cleanup done."
# EOF

48
convert2opus.sh Executable file
View File

@ -0,0 +1,48 @@
#!/bin/bash
# Number of concurrent ffmpeg processes
N=${1:-4}
# Create a semaphore with a given number of slots
semaphore=$(mktemp)
exec 3<>$semaphore
for ((i=0; i<N; i++)); do
echo >&3
done
# Debugging function
debug_log() {
echo "[$(date '+%Y-%m-%d %H:%M:%S')] $@"
}
# Run ffmpeg command with semaphore locking and delete .wav file on success
run_ffmpeg() {
local file=$1
local outfile="${file/%.wav/.opus}"
# Wait for a slot to become available
read -u 3
debug_log "Starting conversion: $file"
if ffmpeg -i "$file" -y -c:a libopus -vbr on -compression_level 5 -ar 48000 -application audio "$outfile"; then
debug_log "Completed conversion: $file"
debug_log "Deleting original file: $file"
rm "$file"
else
debug_log "Failed conversion: $file"
fi
# Release the slot
echo >&3
}
export -f run_ffmpeg
export -f debug_log
# Find all .wav files and convert them to .opus in parallel, respecting semaphore, and delete .wav files on success
find . -maxdepth 1 -type f -name '*.wav' -print0 | xargs -0 -I {} -P $N bash -c 'run_ffmpeg "$@"' _ {}
# Cleanup
exec 3>&-
rm -f $semaphore

25
dns_health_check.sh Executable file
View File

@ -0,0 +1,25 @@
#!/bin/bash
# Script Version: 02
# Description: Periodically verify consistency between ns1, ns2, and Google nameserver DNS records for a specific domain.
# Set Variables
# ========
NS1="23.88.113.138"
NS2="116.202.112.180"
GOOGLE_NS="8.8.8.8"
DOMAIN="es1.dynproxy.net"
LOG_FILE="/var/log/dns_health_check.log"
# Main Process
# ========
IP_NS1=$(dig @$NS1 $DOMAIN A +short)
IP_NS2=$(dig @$NS2 $DOMAIN A +short)
IP_GOOGLE=$(dig @$GOOGLE_NS $DOMAIN A +short)
if [ "$IP_NS1" == "$IP_NS2" ] && [ "$IP_NS1" == "$IP_GOOGLE" ]; then
echo "[$(date)] DNS records are consistent across all nameservers: $IP_NS1" >> "$LOG_FILE"
else
echo "[$(date)] DNS inconsistency detected!" >> "$LOG_FILE"
echo "[$(date)] ns1: $IP_NS1, ns2: $IP_NS2, Google: $IP_GOOGLE" >> "$LOG_FILE"
fi

37
fetch_transcript.sh Executable file
View File

@ -0,0 +1,37 @@
#!/bin/bash
# Check if URL is provided
if [ -z "$1" ]; then
echo "Usage: $0 <youtube_video_url>"
exit 1
fi
URL=$1
VIDEO_ID=$(echo $URL | grep -o 'v=[^&]*' | cut -d '=' -f 2)
if [ -z "$VIDEO_ID" ]; then
VIDEO_ID=$(echo $URL | grep -o '[^/]*$')
fi
OUTPUT_FILE="${VIDEO_ID}.txt"
# Create a temporary Node.js script
cat << EOF > fetch_transcript.js
const { YoutubeTranscript } = require('youtube-transcript');
const fs = require('fs');
YoutubeTranscript.fetchTranscript('$VIDEO_ID')
.then(transcript => {
const transcriptText = transcript.map(item => item.text).join('\\n');
fs.writeFileSync('$OUTPUT_FILE', transcriptText);
console.log('Transcript saved to $OUTPUT_FILE');
})
.catch(err => {
console.error('Error fetching transcript:', err);
});
EOF
# Run the Node.js script
node fetch_transcript.js
# Clean up
rm fetch_transcript.js

76
ffmpeg_video_compress.sh Executable file
View File

@ -0,0 +1,76 @@
#!/bin/zsh
# Script Version: 14
# Description: Compress a video using ffmpeg with NVIDIA CUDA for acceleration, aiming for a smaller file size. Push GPU usage and CPU multicore hyperthreading to maximize performance by parallel processing.
# Set variables
# ========
INPUT_VIDEO="$1"
OUTPUT_VIDEO="${INPUT_VIDEO%.*}_compressed.mkv" # Output filename based on input with '_compressed' suffix and .mkv extension
TEMP_DIR="/tmp/ffmpeg_chunks"
CHUNK_DURATION=30 # Split video into 30-second chunks for parallel processing
NUM_CHUNKS=4 # Limit to 4 chunks
# Main Process
# ========
if [[ -z "$INPUT_VIDEO" ]]; then
echo "Usage: $0 <input_video>"
exit 1
fi
# Check if GNU Parallel is installed
if ! command -v parallel &> /dev/null; then
echo "GNU Parallel is required but not installed. Please install it with: apt-get install parallel"
exit 1
fi
# Create a temporary directory for storing chunks
mkdir -p "$TEMP_DIR"
# Split the input video into smaller chunks, ensuring proper timestamps and avoiding timestamp issues
ffmpeg -fflags +genpts -copyts -i "$INPUT_VIDEO" -c copy -map 0 -segment_time "$CHUNK_DURATION" -reset_timestamps 1 -f segment "$TEMP_DIR/chunk_%03d.mkv"
# Verify if splitting succeeded
if [[ $? -ne 0 ]]; then
echo "Error: Failed to split the video into chunks."
rm -rf "$TEMP_DIR"
exit 1
fi
# Limit the number of chunks to 4
CHUNKS=$(ls "$TEMP_DIR"/chunk_*.mkv | head -n "$NUM_CHUNKS")
# Compress each chunk in parallel using GNU Parallel
echo "$CHUNKS" | parallel -j "$NUM_CHUNKS" ffmpeg -hwaccel cuda -i {} -c:v hevc_nvenc -preset p1 -rc constqp -qp 20 -b:v 5M -maxrate 10M -bufsize 20M -c:a copy {.}_compressed.mkv
# Verify if compression succeeded
if [[ $? -ne 0 ]]; then
echo "Error: Compression failed for one or more chunks."
rm -rf "$TEMP_DIR"
exit 1
fi
# Concatenate the compressed chunks into the final output file
ls "$TEMP_DIR"/*_compressed.mkv | sort | xargs -I {} echo "file '{}'" > "$TEMP_DIR/file_list.txt"
ffmpeg -f concat -safe 0 -i "$TEMP_DIR/file_list.txt" -c copy "$OUTPUT_VIDEO"
# Verify if concatenation succeeded
if [[ $? -ne 0 ]]; then
echo "Error: Failed to concatenate the compressed chunks."
rm -rf "$TEMP_DIR"
exit 1
fi
# Clean up temporary files
rm -rf "$TEMP_DIR"
# Output status
if [[ -f "$OUTPUT_VIDEO" ]]; then
echo "Compression complete. Output file: $OUTPUT_VIDEO"
else
echo "Compression failed. Output file was not created."
exit 1
fi
# Display file sizes
ls -lh "$INPUT_VIDEO" "$OUTPUT_VIDEO" | awk '{print $9, $5}'

201
gitea_push.sh Executable file
View File

@ -0,0 +1,201 @@
#!/bin/zsh
# Script Version: 1.3
# Description: Pushes the current folder (e.g. /etc) to a nested Gitea repo using provided nesting arguments. Auto-creates the remote repo via Gitea API if missing.
# Set variables
# ========
# Try to extract GITEA_API_TOKEN from ~/.netrc if present
if [ -z "$GITEA_API_TOKEN" ] && grep -q '^GITEA_API_TOKEN=' ~/.netrc 2>/dev/null; then
GITEA_API_TOKEN=$(grep '^GITEA_API_TOKEN=' ~/.netrc | head -n1 | cut -d= -f2 | xargs)
export GITEA_API_TOKEN
fi
GITEA_USER="oib"
GITEA_URL="https://gitea.bubuit.net"
GITEA_API_URL="$GITEA_URL/api/v1"
PRIVATE=false
DEBUG=false
COMMIT_MESSAGE="Update $(date +%F_%T)"
# Logging function
# ========
log() {
local level="$1"; shift
if [ "$level" = "DEBUG" ] && [ "$DEBUG" != true ]; then return; fi
local color_reset="$(tput sgr0)"
local color=""
case "$level" in
INFO) color="$(tput setaf 2)" ;; # green
WARNING) color="$(tput setaf 3)" ;; # yellow
ERROR) color="$(tput setaf 1)" ;; # red
DEBUG) color="$(tput setaf 4)" ;; # blue
esac
echo "${color}[$level] $*${color_reset}"
}
# Functions
# ========
create_repo() {
log INFO "Repository does not exist. Creating via API: $REMOTE_PATH"
log DEBUG "POST $GITEA_API_URL/user/repos with name=$REMOTE_PATH and private=$PRIVATE"
RESPONSE=$(curl -s -X POST \
-H "Authorization: token $GITEA_API_TOKEN" \
-H "Content-Type: application/json" \
-d "{\"name\": \"$FOLDER_NAME\", \"private\": $PRIVATE}" \
"$GITEA_API_URL/user/repos")
if echo "$RESPONSE" | grep -q '"clone_url"'; then
log INFO "Remote repository created successfully."
else
log ERROR "Failed to create remote repository: $RESPONSE"
exit 1
fi
}
prepare_commit() {
git add .
if ! git rev-parse --verify HEAD >/dev/null 2>&1; then
log INFO "Creating initial commit"
git commit -m "$COMMIT_MESSAGE"
else
log INFO "Committing changes"
git commit -m "$COMMIT_MESSAGE" || log INFO "Nothing to commit"
fi
}
setup_remote() {
if git remote | grep -q '^origin$'; then
log INFO "Updating remote origin URL"
git remote set-url origin "$GIT_REMOTE"
else
log INFO "Adding remote origin"
git remote add origin "$GIT_REMOTE"
fi
}
push_changes() {
log INFO "Pushing to $GIT_REMOTE"
git push -u origin main
}
# Show help if no arguments are given
# ========
if [ $# -eq 0 ]; then
echo "GITEA_API_TOKEN=<your token>"
echo "Usage: $0 [--private] [--debug] [--message \"your commit message\"] <host_group>"
echo "Example: $0 server"
echo " $0 --private workstation"
echo " $0 --debug server"
echo " $0 --message \"minor update\" server"
echo
echo "Note: You must cd into the target folder before running this script."
echo "For example:"
echo " cd /etc && $0 server"
echo
echo "Authentication:"
echo " Git uses ~/.netrc for authentication. You can create it like this:"
echo " echo \"machine \$(echo \"$GITEA_URL\" | sed 's|https\\?://||') login $GITEA_USER password \"<your Git token or app password>\"\" > ~/.netrc"
echo " chmod 600 ~/.netrc"
exit 0
fi
# Parse arguments
# ========
POSITIONAL_ARGS=()
while [[ $# -gt 0 ]]; do
case "$1" in
--private)
PRIVATE=true
shift
;;
--debug)
DEBUG=true
shift
;;
--message)
COMMIT_MESSAGE="$2"
shift 2
;;
*)
POSITIONAL_ARGS+=("$1")
shift
;;
esac
done
set -- "${POSITIONAL_ARGS[@]}"
if [ $# -ne 1 ]; then
echo "Usage: $0 [--private] [--debug] [--message \"your commit message\"] <host_group>"
exit 1
fi
HOST_GROUP=$(echo "$1" | tr '[:upper:]' '[:lower:]' | tr -cd 'a-z0-9-')
HOST_NAME=$(hostname -s | tr '[:upper:]' '[:lower:]' | tr -cd 'a-z0-9-')
HOST_NAME=$(hostname -s | tr '[:upper:]' '[:lower:]' | tr -cd 'a-z0-9-')
FOLDER_NAME="${HOST_NAME}-$(basename "$PWD")"
REPO_PATH="$PWD"
REMOTE_PATH="$FOLDER_NAME"
GIT_REMOTE="https://gitea.bubuit.net/$GITEA_USER/$FOLDER_NAME.git"
# Git authentication hint
# export GIT_ASKPASS=true # disabled: does not affect authentication without handler
log DEBUG "Ensure ~/.netrc has:
machine gitea.bubuit.net login $GITEA_USER password <personal access token>"
# Check if GITEA_API_TOKEN is set
if [ -z "$GITEA_API_TOKEN" ]; then
log WARNING "GITEA_API_TOKEN is not set. Skipping API repo creation."
else
# Check if remote repo exists
HTTP_STATUS=$(curl -s -o /dev/null -w "%{http_code}" \
-H "Authorization: token $GITEA_API_TOKEN" \
"$GITEA_API_URL/repos/$GITEA_USER/$FOLDER_NAME")
if [ "$HTTP_STATUS" -ne 200 ]; then
create_repo
else
log INFO "Remote repository already exists."
fi
fi
# Main Process
# ========
# Safety check against pushing from / or $HOME
if [[ "$PWD" == "$HOME" || "$PWD" == "/" ]]; then
log ERROR "Refusing to run inside \$PWD=$PWD"
exit 1
fi
log INFO "Pushing $REPO_PATH to $GIT_REMOTE"
cd "$REPO_PATH" || { log ERROR "Directory $REPO_PATH not found"; exit 1; }
# Initialize git if needed
# Branch is fixed to 'main' for simplicity and consistency
if [ ! -d .git ]; then
log INFO "Initializing Git repo"
git init
git config init.defaultBranch main
git checkout -b main
else
log DEBUG ".git directory already present"
fi
# Ensure at least one commit exists
prepare_commit
# Set or update remote
if [ "$HTTP_STATUS" -eq 200 ]; then
setup_remote
else
log WARNING "Skipping remote setup repository does not exist."
fi
# Push to remote
if [ "$HTTP_STATUS" -eq 200 ]; then
push_changes
else
log WARNING "Skipping push repository does not exist."
fi

46
gitea_push_debug.sh Executable file
View File

@ -0,0 +1,46 @@
#!/bin/zsh
# Script Version: 1.0 Debug
# Description: Extended debug version of Gitea push script for diagnosing issues.
# 1) Basic variables
GITEA_USER="oib"
GITEA_URL="https://gitea.bubuit.net"
GITEA_API_URL="$GITEA_URL/api/v1"
# 2) Debug function
log_debug() {
echo "[DEBUG] $@"
}
log_debug "== Starting gitea_push_debug.sh =="
# 3) Show environment
log_debug "Home Dir: $HOME"
log_debug "PWD: $PWD"
log_debug "User: $USER"
# 4) Check GITEA_API_TOKEN from environment
if [ -z "$GITEA_API_TOKEN" ]; then
log_debug "GITEA_API_TOKEN is not set in environment"
else
log_debug "GITEA_API_TOKEN is present, length: ${#GITEA_API_TOKEN}"
fi
# 5) Attempt to read from ~/.gitea_token
if [ -f "$HOME/.gitea_token" ]; then
TOKEN_FILE_CONTENT=$(cat "$HOME/.gitea_token")
log_debug "~/.gitea_token found, length: ${#TOKEN_FILE_CONTENT}"
else
log_debug "~/.gitea_token not found"
fi
# 6) Try an API request to /user with the token from environment
if [ -n "$GITEA_API_TOKEN" ]; then
USER_RESPONSE=$(curl -s -H "Authorization: token $GITEA_API_TOKEN" "$GITEA_API_URL/user")
log_debug "Response from /user: $USER_RESPONSE"
else
log_debug "Skipping /user request; no valid GITEA_API_TOKEN in environment."
fi
log_debug "== End gitea_push_debug.sh =="

63
normalize.sh Executable file
View File

@ -0,0 +1,63 @@
#!/bin/bash
# Check for ffmpeg dependency
if ! command -v ffmpeg &> /dev/null; then
echo "ffmpeg could not be found. Please install ffmpeg to use this script."
exit 1
fi
# Number of concurrent ffmpeg processes
N=${1:-8}
# Semaphore file for limiting concurrent processes
semaphore=/tmp/ffmpeg.lock
# Opening the semaphore file descriptor on 9
exec 9>"$semaphore"
# Function to run an ffmpeg command with semaphore locking
run_ffmpeg() {
# Wait for a semaphore slot to become available
flock -x 9
# Execute the ffmpeg command
ffmpeg -y -i "$1" -af "loudnorm=I=-16:LRA=11:measured_I=-20:measured_LRA=16,volume=0.8" "$2"
if [ $? -ne 0 ]; then
echo "An error occurred with ffmpeg processing $1"
# Release the semaphore slot on error as well
flock -u 9
return 1
fi
# Release the semaphore slot
flock -u 9
}
# Create the semaphore file if it does not exist
touch "$semaphore"
if [ ! -f "$semaphore" ]; then
echo "Failed to create semaphore file."
exit 1
fi
# Processing each .opus file
find . -maxdepth 1 -type f -name '*.m4a' | while read -r file; do
wav_file="${file/%.m4a/.wav}"
if [ ! -f "$wav_file" ]; then
echo "Processing $file..."
run_ffmpeg "$file" "$wav_file" &
# Ensure N parallel ffmpeg processes
while [ $(jobs -p | wc -l) -ge "$N" ]; do
wait -n
done
fi
done
# Wait for all background jobs to finish
wait
# Close the semaphore file descriptor and remove the file
exec 9>&-
rm -f "$semaphore"

189
scraper.sh Executable file
View File

@ -0,0 +1,189 @@
#!/bin/bash
# Script Version: 01.8
# Description: Scrapes and extracts page text from MediaWiki pages, cleans image artifacts, and deletes empty results
# Constants
DOWNLOAD_TIMEOUT=10
TEXT_FILE_SUFFIX=".txt"
LINK_FILE_SUFFIX=".txt"
# Function to convert relative URLs to absolute URLs
resolve_url() {
local base_url=$1
local relative_url=$2
if [[ "$relative_url" =~ ^https?:// ]]; then
echo "$relative_url"
elif [[ "$relative_url" =~ ^/ ]]; then
echo "${base_url}${relative_url}"
else
echo "${base_url}/${relative_url}"
fi
}
# Function to check if URL should be skipped
should_skip_url() {
local url=$1
case "$url" in
*"load.php"*|*"IE9fixes.css"*|*"favicon.ico"*|*"opensearch_desc.php"*|*"api.php?action="*|*"Special:RecentChanges"*|*"Special:UserLogin"*|*"Special:RequestAccount"*|*"Dioxipedia:Privacy_policy"*|*"javascript:print();"*|*"mediawiki.org"*)
return 0 ;; # true, should skip
*)
return 1 ;; # false, don't skip
esac
}
# Function to download content into a variable with timeout and error checking
download_content() {
local url=$1
local exclude_file=$2
if should_skip_url "$url"; then
echo "Skipping known irrelevant URL: $url"
return 1
fi
if [ -f "$exclude_file" ] && grep -Fx "$url" "$exclude_file" > /dev/null; then
echo "Skipping excluded URL: $url"
return 1
fi
echo "Downloading: $url"
SITECACHE=$(wget -T "$DOWNLOAD_TIMEOUT" -q -O - "$url" 2>/dev/null)
if [ $? -ne 0 ] || [ -z "$SITECACHE" ]; then
echo -e "\033[31m[ ERROR ]:\033[0m Failed to download $url" >&2
echo "$url" >> "$exclude_file"
return 1
fi
if ! echo "$SITECACHE" | grep -q "<html"; then
echo "Skipping: $url (not HTML)"
echo "$url" >> "$exclude_file"
return 1
fi
sleep 1
echo "Successfully downloaded: $url"
return 0
}
# Improved extraction function using pup and lynx
extract_text() {
local output_file=$1
local url=$2
local exclude_file=$3
echo "Extracting text from SITECACHE to $output_file"
EXTRACTED=$(echo "$SITECACHE" | pup '#mw-content-text' 2>/dev/null)
if [ -z "$(echo "$EXTRACTED" | sed '/^\s*$/d')" ]; then
echo "INFO: Content empty with #mw-content-text, trying #bodyContent"
EXTRACTED=$(echo "$SITECACHE" | pup '#bodyContent' 2>/dev/null)
fi
if [ -z "$(echo "$EXTRACTED" | sed '/^\s*$/d')" ]; then
echo "WARNING: Still no content after both selectors."
echo "$url" >> "$exclude_file"
return 1
fi
echo "$EXTRACTED" | lynx -stdin -dump -nolist > "$output_file"
if [ ! -s "$output_file" ]; then
echo "WARNING: No text extracted from $url after lynx"
echo "$url" >> "$exclude_file"
rm -f "$output_file"
return 1
fi
# Remove lines containing image artifacts like [something.jpg] or [something.png]
sed -i '/\[.*\(jpg\|jpeg\|png\).*]/Id' "$output_file"
# Delete if file is smaller than 100 bytes
if [ $(stat -c%s "$output_file") -lt 100 ]; then
echo "INFO: Deleted $output_file (under 100 bytes)"
rm -f "$output_file"
echo "$url" >> "$exclude_file"
return 1
fi
echo "Successfully extracted text to $output_file"
return 0
}
# Function to extract page title
extract_title() {
echo "$SITECACHE" | grep -oP '(?<=<title>).*(?=</title>)' | head -n 1 | sed 's/ - dioxipedia$//' | sed 's/[^a-zA-Z0-9-]/_/g' | sed 's/__*/_/g' | sed 's/^_//;s/_$//'
}
# Function to extract links
extract_links() {
local output_file=$1
echo "$SITECACHE" | grep -oP '(?<=href=")[^"]+' | grep -v 'translate\.goog' > "$output_file"
if [ $? -ne 0 ] || [ ! -s "$output_file" ]; then
echo "WARNING: No links extracted"
rm -f "$output_file"
return 1
fi
echo "Successfully extracted links to $output_file"
return 0
}
# Main script logic
if [ $# -ne 1 ]; then
echo "Usage: $0 <URL>" >&2
exit 1
fi
INITIAL_URL=$1
DOMAIN=$(echo "$INITIAL_URL" | awk -F[/:] '{print $4}')
BASE_URL="https://$DOMAIN"
ALL_PAGES_URL="$BASE_URL/index.php?title=Special:AllPages"
LINKSFILE="$DOMAIN/links$LINK_FILE_SUFFIX"
EXCLUDE_FILE="$DOMAIN/exclude.txt"
CONTENT_DIR="$DOMAIN/content"
mkdir -p "$DOMAIN"
mkdir -p "$CONTENT_DIR"
# Step 1: Collect links
if ! download_content "$ALL_PAGES_URL" "$EXCLUDE_FILE"; then
echo "Failed to download $ALL_PAGES_URL"
exit 1
fi
if ! extract_links "$LINKSFILE"; then
echo "Failed to extract links"
exit 1
fi
# Step 2: Process links
PROCESSED_URLS=()
while IFS= read -r link; do
URL=$(resolve_url "$BASE_URL" "$link")
if [[ " ${PROCESSED_URLS[*]} " =~ " $URL " ]]; then
echo "Skipping processed URL: $URL"
continue
fi
if ! download_content "$URL" "$EXCLUDE_FILE"; then
PROCESSED_URLS+=("$URL")
continue
fi
PAGENAME=$(extract_title)
[ -z "$PAGENAME" ] && PAGENAME="page"
TEXTFILE="$CONTENT_DIR/$PAGENAME$TEXT_FILE_SUFFIX"
if ! extract_text "$TEXTFILE" "$URL" "$EXCLUDE_FILE"; then
PROCESSED_URLS+=("$URL")
continue
fi
PROCESSED_URLS+=("$URL")
done < "$LINKSFILE"
echo "Processing complete."
exit 0

12
set_nvidia_fan.sh Executable file
View File

@ -0,0 +1,12 @@
#!/bin/bash
# Script Version: 03
# Description: Set NVIDIA GPU fan speed on startup.
# Define the display and X authority paths
export DISPLAY=:1
export XAUTHORITY=/run/user/1000/gdm/Xauthority
# Run nvidia-settings commands as root
sudo /usr/bin/nvidia-settings -c :1 -a "[gpu:0]/GPUFanControlState=1"
sudo /usr/bin/nvidia-settings -c :1 -a "[fan:0]/GPUTargetFanSpeed=10"

47
soa_monitor.sh Executable file
View File

@ -0,0 +1,47 @@
#!/bin/bash
# Script Version: 07
# Description: Monitor SOA for dynproxy.net and play a sound when it changes to ns1.dynproxy.net.
# Set variables
# ========
DOMAIN="dynproxy.net"
EXPECTED_NS="ns1.dynproxy.net."
SOUND_CMD="paplay /usr/share/sounds/freedesktop/stereo/alarm-clock-elapsed.oga"
CHECK_INTERVAL=60 # Interval in seconds
# Functions
# ========
get_soa() {
dig "$DOMAIN" SOA +short | awk '{print $1}'
}
play_sound() {
$SOUND_CMD
}
# Main Process
# ========
echo "Monitoring SOA for $DOMAIN. Expected NS: $EXPECTED_NS"
LAST_SOA=""
while true; do
CURRENT_SOA=$(get_soa)
if [[ -z "$CURRENT_SOA" ]]; then
echo "Error fetching SOA record. Network issue or domain unreachable."
sleep $CHECK_INTERVAL
continue
fi
if [[ "$CURRENT_SOA" != "$LAST_SOA" ]]; then
echo "SOA changed! New SOA: $CURRENT_SOA"
LAST_SOA="$CURRENT_SOA"
if [[ "$CURRENT_SOA" == "$EXPECTED_NS" ]]; then
echo "SOA matches expected NS. Playing sound..."
play_sound
fi
fi
sleep $CHECK_INTERVAL
done

41
test_ns_zones.sh Executable file
View File

@ -0,0 +1,41 @@
#!/bin/zsh
# Version 01.0
# Script to test DNS zone propagation across ns1, ns2, and ns3
# Script Name: test_ns_zones.sh
# Variables
NS1="23.88.113.138"
NS2="116.202.112.180"
NS3="95.216.198.140"
# Check if a domain name argument is provided
if [ -z "$1" ]; then
echo "Usage: $0 <domain.tld>"
exit 1
fi
DOMAIN=$1
# Function to test a DNS query
function test_ns {
local NS=$1
echo "
=== Testing $DOMAIN on $NS ==="
dig @$NS $DOMAIN SOA +short
echo ""
echo "MX Record:"
dig @$NS $DOMAIN MX +short
echo ""
echo "A Record for mail.$DOMAIN:"
dig @$NS mail.$DOMAIN A +short
echo ""
}
# Test each nameserver
test_ns $NS1
test_ns $NS2
test_ns $NS3
# Success message
echo "DNS zone test completed for $DOMAIN"

43
wrap_embeddings.sh Executable file
View File

@ -0,0 +1,43 @@
#!/bin/bash
# Script Version: 0.3
# Description: Convert each .txt in content/ to .json with embedding in json/
# Set variables
CONTENT_DIR="./content"
JSON_DIR="./json"
EMBEDDING_MODEL="sentence-transformers/all-MiniLM-L6-v2"
# Check dependencies
if ! python3 -c "import sentence_transformers" 2>/dev/null; then
echo "[ERROR] ❌ sentence-transformers not installed. Run: pip3 install sentence-transformers"
exit 1
fi
# Check input files
mkdir -p "$JSON_DIR"
if [ ! -d "$CONTENT_DIR" ] || ! ls "$CONTENT_DIR"/*.txt >/dev/null 2>&1; then
echo "[ERROR] ❌ No .txt files found in $CONTENT_DIR"
exit 1
fi
# Generate embeddings
python3 -c "
import sys, json, os
from sentence_transformers import SentenceTransformer
content_dir, json_dir = sys.argv[1], sys.argv[2]
model = SentenceTransformer('${EMBEDDING_MODEL}')
for txt_file in os.listdir(content_dir):
if txt_file.endswith('.txt'):
base_name = txt_file[:-4]
try:
with open(os.path.join(content_dir, txt_file), 'r', encoding='utf-8') as f:
text = f.read()
embedding = model.encode([text])[0].tolist()
with open(os.path.join(json_dir, f'{base_name}.json'), 'w') as f:
json.dump({'id': base_name, 'text': text, 'embedding': embedding}, f)
print(f'[DEBUG] ✅ Saved: {json_dir}/{base_name}.json')
except Exception as e:
print(f'[ERROR] ❌ Failed: {txt_file} - {str(e)}', file=sys.stderr)
" "$CONTENT_DIR" "$JSON_DIR" 2>&1 | while read -r line; do echo "$line"; done
echo "✅ All .txt files converted to JSON with embeddings in $JSON_DIR"