From a04156520394e1362a43faedb43c1ab28cf580a1 Mon Sep 17 00:00:00 2001 From: oib Date: Sat, 12 Apr 2025 09:56:11 +0200 Subject: [PATCH] Update 2025-04-12_09:56:11 --- 2mp3cudaRestoration.sh | 54 +++++++++++ calculate_bitrate.sh | 46 +++++++++ clean_local_bin.sh | 57 +++++++++++ convert2opus.sh | 48 ++++++++++ dns_health_check.sh | 25 +++++ fetch_transcript.sh | 37 +++++++ ffmpeg_video_compress.sh | 76 +++++++++++++++ gitea_push.sh | 201 +++++++++++++++++++++++++++++++++++++++ gitea_push_debug.sh | 46 +++++++++ normalize.sh | 63 ++++++++++++ scraper.sh | 189 ++++++++++++++++++++++++++++++++++++ set_nvidia_fan.sh | 12 +++ soa_monitor.sh | 47 +++++++++ test_ns_zones.sh | 41 ++++++++ wrap_embeddings.sh | 43 +++++++++ 15 files changed, 985 insertions(+) create mode 100755 2mp3cudaRestoration.sh create mode 100755 calculate_bitrate.sh create mode 100755 clean_local_bin.sh create mode 100755 convert2opus.sh create mode 100755 dns_health_check.sh create mode 100755 fetch_transcript.sh create mode 100755 ffmpeg_video_compress.sh create mode 100755 gitea_push.sh create mode 100755 gitea_push_debug.sh create mode 100755 normalize.sh create mode 100755 scraper.sh create mode 100755 set_nvidia_fan.sh create mode 100755 soa_monitor.sh create mode 100755 test_ns_zones.sh create mode 100755 wrap_embeddings.sh diff --git a/2mp3cudaRestoration.sh b/2mp3cudaRestoration.sh new file mode 100755 index 0000000..fec28b5 --- /dev/null +++ b/2mp3cudaRestoration.sh @@ -0,0 +1,54 @@ +#!/bin/bash +# Script Version: 03 +# Description: Simple script to convert audio files to MP3 (192 kbps) using CUDA for acceleration. + +# Set variables +# ======== +INPUT_DIR="$(pwd)" +OUTPUT_DIR="$INPUT_DIR/output" +BITRATE="192k" + +# Functions +# ======== +convert_to_mp3() { + local INPUT_FILE="$1" + local OUTPUT_FILE="$2" + + # Convert to MP3 with FFmpeg + ffmpeg -hwaccel cuda -i "$INPUT_FILE" -c:a libmp3lame -b:a "$BITRATE" "$OUTPUT_FILE" -y || return 1 +} + +# Main Process +# ======== +echo "Starting audio conversion process..." +mkdir -p "$OUTPUT_DIR" + +SUCCESS_COUNT=0 +TOTAL_COUNT=0 + +shopt -s nullglob +for FILE in "$INPUT_DIR"/*; do + if [[ -f "$FILE" ]]; then + BASENAME=$(basename "$FILE") + EXTENSION="${BASENAME##*.}" + + # Skip unsupported extensions + if ! [[ "$EXTENSION" =~ ^(wav|flac|opus|m4a|mp3)$ ]]; then + echo "Skipping unsupported file: $FILE" + continue + fi + + OUTPUT_FILE="$OUTPUT_DIR/${BASENAME%.*}.mp3" + + echo "Converting $FILE to $OUTPUT_FILE" + if convert_to_mp3 "$FILE" "$OUTPUT_FILE"; then + SUCCESS_COUNT=$((SUCCESS_COUNT + 1)) + fi + TOTAL_COUNT=$((TOTAL_COUNT + 1)) + fi + +done +shopt -u nullglob + +echo "Audio conversion process completed. Success: $SUCCESS_COUNT/$TOTAL_COUNT" + diff --git a/calculate_bitrate.sh b/calculate_bitrate.sh new file mode 100755 index 0000000..6ca2ff7 --- /dev/null +++ b/calculate_bitrate.sh @@ -0,0 +1,46 @@ +#!/bin/bash + +# Check if the folder is provided as an argument +if [ -z "$1" ]; then + echo "Usage: $0 " + exit 1 +fi + +VIDEO_FOLDER="$1" +OUTPUT_FILE="bitrates.txt" +TOTAL_BITRATE=0 +VIDEO_COUNT=0 + +# Clear or create the output file +> "$OUTPUT_FILE" + +# Function to get bitrate of a video in Mbps +get_bitrate() { + local video_file="$1" + bitrate_kbps=$(ffprobe -v error -select_streams v:0 -show_entries stream=bit_rate -of default=noprint_wrappers=1:nokey=1 "$video_file" | head -n 1) + if [[ "$bitrate_kbps" =~ ^[0-9]+$ ]]; then + bitrate_mbps=$(echo "scale=2; $bitrate_kbps / 1000 / 1000" | bc) + echo "$bitrate_mbps" + else + echo "0" + fi +} + +# Iterate through each video file in the folder +for video_file in "$VIDEO_FOLDER"/*; do + if [ -f "$video_file" ]; then + bitrate=$(get_bitrate "$video_file") + echo "File: $video_file - Bitrate: ${bitrate} Mbps" | tee -a "$OUTPUT_FILE" + TOTAL_BITRATE=$(echo "$TOTAL_BITRATE + $bitrate" | bc) + ((VIDEO_COUNT++)) + fi +done + +# Calculate the average bitrate +if [ "$VIDEO_COUNT" -gt 0 ]; then + AVERAGE_BITRATE=$(echo "scale=2; $TOTAL_BITRATE / $VIDEO_COUNT" | bc) + echo "Average Bitrate: $AVERAGE_BITRATE Mbps" | tee -a "$OUTPUT_FILE" +else + echo "No video files found in the specified folder." | tee -a "$OUTPUT_FILE" +fi + diff --git a/clean_local_bin.sh b/clean_local_bin.sh new file mode 100755 index 0000000..0639a38 --- /dev/null +++ b/clean_local_bin.sh @@ -0,0 +1,57 @@ +#!/bin/bash +# Script Version: 0.1 +# Description: Interactive cleanup for stale Python tool CLI files in ~/.local/bin (at2 workstation) + +# Set variables +# ======== +TARGET_DIR="$HOME/.local/bin" +LOG_FILE="$HOME/.clean_local_bin.log" + +# Functions +# ======== + +# List suspected legacy tools +list_legacy_tools() { + find "$TARGET_DIR" -maxdepth 1 -type f \( \ + -name 'futurize' -o -name 'pasteurize' -o -name 'pkginfo' -o -name 'pybabel' \ + -o -name 'pygmentize' -o -name 'webassets' -o -name 'wheel' \ + -o -name 'mutagen-*' -o -name 'mid3*' -o -name 'moggsplit' \ + -o -name 'filetype' -o -name 'normalizer' -o -name 'markdown*' \ + -o -name 'jsonschema' -o -name 'httpx' -o -name 'openai' \ + -o -name 'unidecode' -o -name 'netaddr' -o -name 'flask' \ + -o -name 'pyserial-*' -o -name 'psg*' \ + \) +} + +# Main Process +# ======== +echo "[DEBUG] Scanning $TARGET_DIR for workshop leftovers..." + +list_legacy_tools > /tmp/.local_bin_candidates.txt + +if [[ ! -s /tmp/.local_bin_candidates.txt ]]; then + echo "[DEBUG] Nothing found to delete." + exit 0 +fi + +echo "[DEBUG] Found the following candidates:" +cat /tmp/.local_bin_candidates.txt + +echo "[DEBUG] Proceed with deletion? (y/n)" +read CONFIRM +if [[ "$CONFIRM" != "y" ]]; then + echo "[DEBUG] Aborted by user" + exit 1 +fi + +echo "[DEBUG] Deleting files and logging to $LOG_FILE" +while read -r FILE; do + echo "[DEBUG] Removing $FILE" + echo "$(date) [DELETED] $FILE" >> "$LOG_FILE" + rm -v "$FILE" +done < /tmp/.local_bin_candidates.txt + +echo "[DEBUG] Cleanup done." + +# EOF + diff --git a/convert2opus.sh b/convert2opus.sh new file mode 100755 index 0000000..4b041be --- /dev/null +++ b/convert2opus.sh @@ -0,0 +1,48 @@ +#!/bin/bash + +# Number of concurrent ffmpeg processes +N=${1:-4} + +# Create a semaphore with a given number of slots +semaphore=$(mktemp) +exec 3<>$semaphore +for ((i=0; i&3 +done + +# Debugging function +debug_log() { + echo "[$(date '+%Y-%m-%d %H:%M:%S')] $@" +} + +# Run ffmpeg command with semaphore locking and delete .wav file on success +run_ffmpeg() { + local file=$1 + local outfile="${file/%.wav/.opus}" + + # Wait for a slot to become available + read -u 3 + debug_log "Starting conversion: $file" + + if ffmpeg -i "$file" -y -c:a libopus -vbr on -compression_level 5 -ar 48000 -application audio "$outfile"; then + debug_log "Completed conversion: $file" + debug_log "Deleting original file: $file" + rm "$file" + else + debug_log "Failed conversion: $file" + fi + + # Release the slot + echo >&3 +} + +export -f run_ffmpeg +export -f debug_log + +# Find all .wav files and convert them to .opus in parallel, respecting semaphore, and delete .wav files on success +find . -maxdepth 1 -type f -name '*.wav' -print0 | xargs -0 -I {} -P $N bash -c 'run_ffmpeg "$@"' _ {} + +# Cleanup +exec 3>&- +rm -f $semaphore + diff --git a/dns_health_check.sh b/dns_health_check.sh new file mode 100755 index 0000000..05345eb --- /dev/null +++ b/dns_health_check.sh @@ -0,0 +1,25 @@ +#!/bin/bash +# Script Version: 02 +# Description: Periodically verify consistency between ns1, ns2, and Google nameserver DNS records for a specific domain. + +# Set Variables +# ======== +NS1="23.88.113.138" +NS2="116.202.112.180" +GOOGLE_NS="8.8.8.8" +DOMAIN="es1.dynproxy.net" +LOG_FILE="/var/log/dns_health_check.log" + +# Main Process +# ======== +IP_NS1=$(dig @$NS1 $DOMAIN A +short) +IP_NS2=$(dig @$NS2 $DOMAIN A +short) +IP_GOOGLE=$(dig @$GOOGLE_NS $DOMAIN A +short) + +if [ "$IP_NS1" == "$IP_NS2" ] && [ "$IP_NS1" == "$IP_GOOGLE" ]; then + echo "[$(date)] DNS records are consistent across all nameservers: $IP_NS1" >> "$LOG_FILE" +else + echo "[$(date)] DNS inconsistency detected!" >> "$LOG_FILE" + echo "[$(date)] ns1: $IP_NS1, ns2: $IP_NS2, Google: $IP_GOOGLE" >> "$LOG_FILE" +fi + diff --git a/fetch_transcript.sh b/fetch_transcript.sh new file mode 100755 index 0000000..2a6566e --- /dev/null +++ b/fetch_transcript.sh @@ -0,0 +1,37 @@ +#!/bin/bash + +# Check if URL is provided +if [ -z "$1" ]; then + echo "Usage: $0 " + exit 1 +fi + +URL=$1 +VIDEO_ID=$(echo $URL | grep -o 'v=[^&]*' | cut -d '=' -f 2) +if [ -z "$VIDEO_ID" ]; then + VIDEO_ID=$(echo $URL | grep -o '[^/]*$') +fi +OUTPUT_FILE="${VIDEO_ID}.txt" + +# Create a temporary Node.js script +cat << EOF > fetch_transcript.js +const { YoutubeTranscript } = require('youtube-transcript'); +const fs = require('fs'); + +YoutubeTranscript.fetchTranscript('$VIDEO_ID') + .then(transcript => { + const transcriptText = transcript.map(item => item.text).join('\\n'); + fs.writeFileSync('$OUTPUT_FILE', transcriptText); + console.log('Transcript saved to $OUTPUT_FILE'); + }) + .catch(err => { + console.error('Error fetching transcript:', err); + }); +EOF + +# Run the Node.js script +node fetch_transcript.js + +# Clean up +rm fetch_transcript.js + diff --git a/ffmpeg_video_compress.sh b/ffmpeg_video_compress.sh new file mode 100755 index 0000000..5a4e80c --- /dev/null +++ b/ffmpeg_video_compress.sh @@ -0,0 +1,76 @@ +#!/bin/zsh +# Script Version: 14 +# Description: Compress a video using ffmpeg with NVIDIA CUDA for acceleration, aiming for a smaller file size. Push GPU usage and CPU multicore hyperthreading to maximize performance by parallel processing. + +# Set variables +# ======== +INPUT_VIDEO="$1" +OUTPUT_VIDEO="${INPUT_VIDEO%.*}_compressed.mkv" # Output filename based on input with '_compressed' suffix and .mkv extension +TEMP_DIR="/tmp/ffmpeg_chunks" +CHUNK_DURATION=30 # Split video into 30-second chunks for parallel processing +NUM_CHUNKS=4 # Limit to 4 chunks + +# Main Process +# ======== +if [[ -z "$INPUT_VIDEO" ]]; then + echo "Usage: $0 " + exit 1 +fi + +# Check if GNU Parallel is installed +if ! command -v parallel &> /dev/null; then + echo "GNU Parallel is required but not installed. Please install it with: apt-get install parallel" + exit 1 +fi + +# Create a temporary directory for storing chunks +mkdir -p "$TEMP_DIR" + +# Split the input video into smaller chunks, ensuring proper timestamps and avoiding timestamp issues +ffmpeg -fflags +genpts -copyts -i "$INPUT_VIDEO" -c copy -map 0 -segment_time "$CHUNK_DURATION" -reset_timestamps 1 -f segment "$TEMP_DIR/chunk_%03d.mkv" + +# Verify if splitting succeeded +if [[ $? -ne 0 ]]; then + echo "Error: Failed to split the video into chunks." + rm -rf "$TEMP_DIR" + exit 1 +fi + +# Limit the number of chunks to 4 +CHUNKS=$(ls "$TEMP_DIR"/chunk_*.mkv | head -n "$NUM_CHUNKS") + +# Compress each chunk in parallel using GNU Parallel +echo "$CHUNKS" | parallel -j "$NUM_CHUNKS" ffmpeg -hwaccel cuda -i {} -c:v hevc_nvenc -preset p1 -rc constqp -qp 20 -b:v 5M -maxrate 10M -bufsize 20M -c:a copy {.}_compressed.mkv + +# Verify if compression succeeded +if [[ $? -ne 0 ]]; then + echo "Error: Compression failed for one or more chunks." + rm -rf "$TEMP_DIR" + exit 1 +fi + +# Concatenate the compressed chunks into the final output file +ls "$TEMP_DIR"/*_compressed.mkv | sort | xargs -I {} echo "file '{}'" > "$TEMP_DIR/file_list.txt" +ffmpeg -f concat -safe 0 -i "$TEMP_DIR/file_list.txt" -c copy "$OUTPUT_VIDEO" + +# Verify if concatenation succeeded +if [[ $? -ne 0 ]]; then + echo "Error: Failed to concatenate the compressed chunks." + rm -rf "$TEMP_DIR" + exit 1 +fi + +# Clean up temporary files +rm -rf "$TEMP_DIR" + +# Output status +if [[ -f "$OUTPUT_VIDEO" ]]; then + echo "Compression complete. Output file: $OUTPUT_VIDEO" +else + echo "Compression failed. Output file was not created." + exit 1 +fi + +# Display file sizes +ls -lh "$INPUT_VIDEO" "$OUTPUT_VIDEO" | awk '{print $9, $5}' + diff --git a/gitea_push.sh b/gitea_push.sh new file mode 100755 index 0000000..2b78169 --- /dev/null +++ b/gitea_push.sh @@ -0,0 +1,201 @@ +#!/bin/zsh +# Script Version: 1.3 +# Description: Pushes the current folder (e.g. /etc) to a nested Gitea repo using provided nesting arguments. Auto-creates the remote repo via Gitea API if missing. + +# Set variables +# ======== + +# Try to extract GITEA_API_TOKEN from ~/.netrc if present +if [ -z "$GITEA_API_TOKEN" ] && grep -q '^GITEA_API_TOKEN=' ~/.netrc 2>/dev/null; then + GITEA_API_TOKEN=$(grep '^GITEA_API_TOKEN=' ~/.netrc | head -n1 | cut -d= -f2 | xargs) + export GITEA_API_TOKEN +fi +GITEA_USER="oib" +GITEA_URL="https://gitea.bubuit.net" +GITEA_API_URL="$GITEA_URL/api/v1" +PRIVATE=false +DEBUG=false +COMMIT_MESSAGE="Update $(date +%F_%T)" + +# Logging function +# ======== +log() { + local level="$1"; shift + if [ "$level" = "DEBUG" ] && [ "$DEBUG" != true ]; then return; fi + local color_reset="$(tput sgr0)" + local color="" + case "$level" in + INFO) color="$(tput setaf 2)" ;; # green + WARNING) color="$(tput setaf 3)" ;; # yellow + ERROR) color="$(tput setaf 1)" ;; # red + DEBUG) color="$(tput setaf 4)" ;; # blue + esac + echo "${color}[$level] $*${color_reset}" +} + +# Functions +# ======== +create_repo() { + log INFO "Repository does not exist. Creating via API: $REMOTE_PATH" + log DEBUG "POST $GITEA_API_URL/user/repos with name=$REMOTE_PATH and private=$PRIVATE" + RESPONSE=$(curl -s -X POST \ + -H "Authorization: token $GITEA_API_TOKEN" \ + -H "Content-Type: application/json" \ + -d "{\"name\": \"$FOLDER_NAME\", \"private\": $PRIVATE}" \ + "$GITEA_API_URL/user/repos") + + if echo "$RESPONSE" | grep -q '"clone_url"'; then + log INFO "Remote repository created successfully." + else + log ERROR "Failed to create remote repository: $RESPONSE" + exit 1 + fi +} + +prepare_commit() { + git add . + if ! git rev-parse --verify HEAD >/dev/null 2>&1; then + log INFO "Creating initial commit" + git commit -m "$COMMIT_MESSAGE" + else + log INFO "Committing changes" + git commit -m "$COMMIT_MESSAGE" || log INFO "Nothing to commit" + fi +} + +setup_remote() { + if git remote | grep -q '^origin$'; then + log INFO "Updating remote origin URL" + git remote set-url origin "$GIT_REMOTE" + else + log INFO "Adding remote origin" + git remote add origin "$GIT_REMOTE" + fi +} + +push_changes() { + log INFO "Pushing to $GIT_REMOTE" + git push -u origin main +} + +# Show help if no arguments are given +# ======== +if [ $# -eq 0 ]; then + echo "GITEA_API_TOKEN=" + echo "Usage: $0 [--private] [--debug] [--message \"your commit message\"] " + echo "Example: $0 server" + echo " $0 --private workstation" + echo " $0 --debug server" + echo " $0 --message \"minor update\" server" + echo + echo "Note: You must cd into the target folder before running this script." + echo "For example:" + echo " cd /etc && $0 server" + echo + echo "Authentication:" + echo " Git uses ~/.netrc for authentication. You can create it like this:" + echo " echo \"machine \$(echo \"$GITEA_URL\" | sed 's|https\\?://||') login $GITEA_USER password \"\"\" > ~/.netrc" + echo " chmod 600 ~/.netrc" + exit 0 +fi + +# Parse arguments +# ======== +POSITIONAL_ARGS=() +while [[ $# -gt 0 ]]; do + case "$1" in + --private) + PRIVATE=true + shift + ;; + --debug) + DEBUG=true + shift + ;; + --message) + COMMIT_MESSAGE="$2" + shift 2 + ;; + *) + POSITIONAL_ARGS+=("$1") + shift + ;; + esac +done + +set -- "${POSITIONAL_ARGS[@]}" + +if [ $# -ne 1 ]; then + echo "Usage: $0 [--private] [--debug] [--message \"your commit message\"] " + exit 1 +fi + +HOST_GROUP=$(echo "$1" | tr '[:upper:]' '[:lower:]' | tr -cd 'a-z0-9-') +HOST_NAME=$(hostname -s | tr '[:upper:]' '[:lower:]' | tr -cd 'a-z0-9-') +HOST_NAME=$(hostname -s | tr '[:upper:]' '[:lower:]' | tr -cd 'a-z0-9-') +FOLDER_NAME="${HOST_NAME}-$(basename "$PWD")" +REPO_PATH="$PWD" +REMOTE_PATH="$FOLDER_NAME" +GIT_REMOTE="https://gitea.bubuit.net/$GITEA_USER/$FOLDER_NAME.git" + +# Git authentication hint +# export GIT_ASKPASS=true # disabled: does not affect authentication without handler +log DEBUG "Ensure ~/.netrc has: +machine gitea.bubuit.net login $GITEA_USER password " + +# Check if GITEA_API_TOKEN is set +if [ -z "$GITEA_API_TOKEN" ]; then + log WARNING "GITEA_API_TOKEN is not set. Skipping API repo creation." +else + # Check if remote repo exists + HTTP_STATUS=$(curl -s -o /dev/null -w "%{http_code}" \ + -H "Authorization: token $GITEA_API_TOKEN" \ + "$GITEA_API_URL/repos/$GITEA_USER/$FOLDER_NAME") + + if [ "$HTTP_STATUS" -ne 200 ]; then + create_repo + else + log INFO "Remote repository already exists." + fi +fi + +# Main Process +# ======== + +# Safety check against pushing from / or $HOME +if [[ "$PWD" == "$HOME" || "$PWD" == "/" ]]; then + log ERROR "Refusing to run inside \$PWD=$PWD" + exit 1 +fi +log INFO "Pushing $REPO_PATH to $GIT_REMOTE" +cd "$REPO_PATH" || { log ERROR "Directory $REPO_PATH not found"; exit 1; } + +# Initialize git if needed +# Branch is fixed to 'main' for simplicity and consistency +if [ ! -d .git ]; then + log INFO "Initializing Git repo" + git init + git config init.defaultBranch main + git checkout -b main +else + log DEBUG ".git directory already present" +fi + +# Ensure at least one commit exists +prepare_commit + + +# Set or update remote +if [ "$HTTP_STATUS" -eq 200 ]; then + setup_remote +else + log WARNING "Skipping remote setup – repository does not exist." +fi + +# Push to remote +if [ "$HTTP_STATUS" -eq 200 ]; then + push_changes +else + log WARNING "Skipping push – repository does not exist." +fi + diff --git a/gitea_push_debug.sh b/gitea_push_debug.sh new file mode 100755 index 0000000..12e4f39 --- /dev/null +++ b/gitea_push_debug.sh @@ -0,0 +1,46 @@ +#!/bin/zsh +# Script Version: 1.0 Debug +# Description: Extended debug version of Gitea push script for diagnosing issues. + +# 1) Basic variables +GITEA_USER="oib" +GITEA_URL="https://gitea.bubuit.net" +GITEA_API_URL="$GITEA_URL/api/v1" + +# 2) Debug function +log_debug() { + echo "[DEBUG] $@" +} + +log_debug "== Starting gitea_push_debug.sh ==" + +# 3) Show environment +log_debug "Home Dir: $HOME" +log_debug "PWD: $PWD" +log_debug "User: $USER" + +# 4) Check GITEA_API_TOKEN from environment +if [ -z "$GITEA_API_TOKEN" ]; then + log_debug "GITEA_API_TOKEN is not set in environment" +else + log_debug "GITEA_API_TOKEN is present, length: ${#GITEA_API_TOKEN}" +fi + +# 5) Attempt to read from ~/.gitea_token +if [ -f "$HOME/.gitea_token" ]; then + TOKEN_FILE_CONTENT=$(cat "$HOME/.gitea_token") + log_debug "~/.gitea_token found, length: ${#TOKEN_FILE_CONTENT}" +else + log_debug "~/.gitea_token not found" +fi + +# 6) Try an API request to /user with the token from environment +if [ -n "$GITEA_API_TOKEN" ]; then + USER_RESPONSE=$(curl -s -H "Authorization: token $GITEA_API_TOKEN" "$GITEA_API_URL/user") + log_debug "Response from /user: $USER_RESPONSE" +else + log_debug "Skipping /user request; no valid GITEA_API_TOKEN in environment." +fi + +log_debug "== End gitea_push_debug.sh ==" + diff --git a/normalize.sh b/normalize.sh new file mode 100755 index 0000000..1068ddb --- /dev/null +++ b/normalize.sh @@ -0,0 +1,63 @@ +#!/bin/bash + +# Check for ffmpeg dependency +if ! command -v ffmpeg &> /dev/null; then + echo "ffmpeg could not be found. Please install ffmpeg to use this script." + exit 1 +fi + +# Number of concurrent ffmpeg processes +N=${1:-8} + +# Semaphore file for limiting concurrent processes +semaphore=/tmp/ffmpeg.lock + +# Opening the semaphore file descriptor on 9 +exec 9>"$semaphore" + +# Function to run an ffmpeg command with semaphore locking +run_ffmpeg() { + # Wait for a semaphore slot to become available + flock -x 9 + + # Execute the ffmpeg command + ffmpeg -y -i "$1" -af "loudnorm=I=-16:LRA=11:measured_I=-20:measured_LRA=16,volume=0.8" "$2" + if [ $? -ne 0 ]; then + echo "An error occurred with ffmpeg processing $1" + # Release the semaphore slot on error as well + flock -u 9 + return 1 + fi + + # Release the semaphore slot + flock -u 9 +} + +# Create the semaphore file if it does not exist +touch "$semaphore" +if [ ! -f "$semaphore" ]; then + echo "Failed to create semaphore file." + exit 1 +fi + +# Processing each .opus file +find . -maxdepth 1 -type f -name '*.m4a' | while read -r file; do + wav_file="${file/%.m4a/.wav}" + if [ ! -f "$wav_file" ]; then + echo "Processing $file..." + run_ffmpeg "$file" "$wav_file" & + + # Ensure N parallel ffmpeg processes + while [ $(jobs -p | wc -l) -ge "$N" ]; do + wait -n + done + fi +done + +# Wait for all background jobs to finish +wait + +# Close the semaphore file descriptor and remove the file +exec 9>&- +rm -f "$semaphore" + diff --git a/scraper.sh b/scraper.sh new file mode 100755 index 0000000..c9bd32e --- /dev/null +++ b/scraper.sh @@ -0,0 +1,189 @@ +#!/bin/bash +# Script Version: 01.8 +# Description: Scrapes and extracts page text from MediaWiki pages, cleans image artifacts, and deletes empty results + +# Constants +DOWNLOAD_TIMEOUT=10 +TEXT_FILE_SUFFIX=".txt" +LINK_FILE_SUFFIX=".txt" + +# Function to convert relative URLs to absolute URLs +resolve_url() { + local base_url=$1 + local relative_url=$2 + + if [[ "$relative_url" =~ ^https?:// ]]; then + echo "$relative_url" + elif [[ "$relative_url" =~ ^/ ]]; then + echo "${base_url}${relative_url}" + else + echo "${base_url}/${relative_url}" + fi +} + +# Function to check if URL should be skipped +should_skip_url() { + local url=$1 + case "$url" in + *"load.php"*|*"IE9fixes.css"*|*"favicon.ico"*|*"opensearch_desc.php"*|*"api.php?action="*|*"Special:RecentChanges"*|*"Special:UserLogin"*|*"Special:RequestAccount"*|*"Dioxipedia:Privacy_policy"*|*"javascript:print();"*|*"mediawiki.org"*) + return 0 ;; # true, should skip + *) + return 1 ;; # false, don't skip + esac +} + +# Function to download content into a variable with timeout and error checking +download_content() { + local url=$1 + local exclude_file=$2 + + if should_skip_url "$url"; then + echo "Skipping known irrelevant URL: $url" + return 1 + fi + + if [ -f "$exclude_file" ] && grep -Fx "$url" "$exclude_file" > /dev/null; then + echo "Skipping excluded URL: $url" + return 1 + fi + + echo "Downloading: $url" + SITECACHE=$(wget -T "$DOWNLOAD_TIMEOUT" -q -O - "$url" 2>/dev/null) + if [ $? -ne 0 ] || [ -z "$SITECACHE" ]; then + echo -e "\033[31m[ ERROR ]:\033[0m Failed to download $url" >&2 + echo "$url" >> "$exclude_file" + return 1 + fi + if ! echo "$SITECACHE" | grep -q "> "$exclude_file" + return 1 + fi + sleep 1 + echo "Successfully downloaded: $url" + return 0 +} + +# Improved extraction function using pup and lynx +extract_text() { + local output_file=$1 + local url=$2 + local exclude_file=$3 + + echo "Extracting text from SITECACHE to $output_file" + + EXTRACTED=$(echo "$SITECACHE" | pup '#mw-content-text' 2>/dev/null) + + if [ -z "$(echo "$EXTRACTED" | sed '/^\s*$/d')" ]; then + echo "INFO: Content empty with #mw-content-text, trying #bodyContent" + EXTRACTED=$(echo "$SITECACHE" | pup '#bodyContent' 2>/dev/null) + fi + + if [ -z "$(echo "$EXTRACTED" | sed '/^\s*$/d')" ]; then + echo "WARNING: Still no content after both selectors." + echo "$url" >> "$exclude_file" + return 1 + fi + + echo "$EXTRACTED" | lynx -stdin -dump -nolist > "$output_file" + + if [ ! -s "$output_file" ]; then + echo "WARNING: No text extracted from $url after lynx" + echo "$url" >> "$exclude_file" + rm -f "$output_file" + return 1 + fi + + # Remove lines containing image artifacts like [something.jpg] or [something.png] + sed -i '/\[.*\(jpg\|jpeg\|png\).*]/Id' "$output_file" + + # Delete if file is smaller than 100 bytes + if [ $(stat -c%s "$output_file") -lt 100 ]; then + echo "INFO: Deleted $output_file (under 100 bytes)" + rm -f "$output_file" + echo "$url" >> "$exclude_file" + return 1 + fi + + echo "Successfully extracted text to $output_file" + return 0 +} + +# Function to extract page title +extract_title() { + echo "$SITECACHE" | grep -oP '(?<=).*(?=)' | head -n 1 | sed 's/ - dioxipedia$//' | sed 's/[^a-zA-Z0-9-]/_/g' | sed 's/__*/_/g' | sed 's/^_//;s/_$//' +} + +# Function to extract links +extract_links() { + local output_file=$1 + + echo "$SITECACHE" | grep -oP '(?<=href=")[^"]+' | grep -v 'translate\.goog' > "$output_file" + if [ $? -ne 0 ] || [ ! -s "$output_file" ]; then + echo "WARNING: No links extracted" + rm -f "$output_file" + return 1 + fi + echo "Successfully extracted links to $output_file" + return 0 +} + +# Main script logic +if [ $# -ne 1 ]; then + echo "Usage: $0 " >&2 + exit 1 +fi + +INITIAL_URL=$1 +DOMAIN=$(echo "$INITIAL_URL" | awk -F[/:] '{print $4}') +BASE_URL="https://$DOMAIN" +ALL_PAGES_URL="$BASE_URL/index.php?title=Special:AllPages" +LINKSFILE="$DOMAIN/links$LINK_FILE_SUFFIX" +EXCLUDE_FILE="$DOMAIN/exclude.txt" +CONTENT_DIR="$DOMAIN/content" + +mkdir -p "$DOMAIN" +mkdir -p "$CONTENT_DIR" + +# Step 1: Collect links +if ! download_content "$ALL_PAGES_URL" "$EXCLUDE_FILE"; then + echo "Failed to download $ALL_PAGES_URL" + exit 1 +fi + +if ! extract_links "$LINKSFILE"; then + echo "Failed to extract links" + exit 1 +fi + +# Step 2: Process links +PROCESSED_URLS=() +while IFS= read -r link; do + URL=$(resolve_url "$BASE_URL" "$link") + + if [[ " ${PROCESSED_URLS[*]} " =~ " $URL " ]]; then + echo "Skipping processed URL: $URL" + continue + fi + + if ! download_content "$URL" "$EXCLUDE_FILE"; then + PROCESSED_URLS+=("$URL") + continue + fi + + PAGENAME=$(extract_title) + [ -z "$PAGENAME" ] && PAGENAME="page" + + TEXTFILE="$CONTENT_DIR/$PAGENAME$TEXT_FILE_SUFFIX" + + if ! extract_text "$TEXTFILE" "$URL" "$EXCLUDE_FILE"; then + PROCESSED_URLS+=("$URL") + continue + fi + + PROCESSED_URLS+=("$URL") +done < "$LINKSFILE" + +echo "Processing complete." +exit 0 + diff --git a/set_nvidia_fan.sh b/set_nvidia_fan.sh new file mode 100755 index 0000000..bee3d25 --- /dev/null +++ b/set_nvidia_fan.sh @@ -0,0 +1,12 @@ +#!/bin/bash +# Script Version: 03 +# Description: Set NVIDIA GPU fan speed on startup. + +# Define the display and X authority paths +export DISPLAY=:1 +export XAUTHORITY=/run/user/1000/gdm/Xauthority + +# Run nvidia-settings commands as root +sudo /usr/bin/nvidia-settings -c :1 -a "[gpu:0]/GPUFanControlState=1" +sudo /usr/bin/nvidia-settings -c :1 -a "[fan:0]/GPUTargetFanSpeed=10" + diff --git a/soa_monitor.sh b/soa_monitor.sh new file mode 100755 index 0000000..0c23f8f --- /dev/null +++ b/soa_monitor.sh @@ -0,0 +1,47 @@ +#!/bin/bash +# Script Version: 07 +# Description: Monitor SOA for dynproxy.net and play a sound when it changes to ns1.dynproxy.net. + +# Set variables +# ======== +DOMAIN="dynproxy.net" +EXPECTED_NS="ns1.dynproxy.net." +SOUND_CMD="paplay /usr/share/sounds/freedesktop/stereo/alarm-clock-elapsed.oga" +CHECK_INTERVAL=60 # Interval in seconds + +# Functions +# ======== +get_soa() { + dig "$DOMAIN" SOA +short | awk '{print $1}' +} + +play_sound() { + $SOUND_CMD +} + +# Main Process +# ======== +echo "Monitoring SOA for $DOMAIN. Expected NS: $EXPECTED_NS" +LAST_SOA="" + +while true; do + CURRENT_SOA=$(get_soa) + + if [[ -z "$CURRENT_SOA" ]]; then + echo "Error fetching SOA record. Network issue or domain unreachable." + sleep $CHECK_INTERVAL + continue + fi + + if [[ "$CURRENT_SOA" != "$LAST_SOA" ]]; then + echo "SOA changed! New SOA: $CURRENT_SOA" + LAST_SOA="$CURRENT_SOA" + + if [[ "$CURRENT_SOA" == "$EXPECTED_NS" ]]; then + echo "SOA matches expected NS. Playing sound..." + play_sound + fi + fi + sleep $CHECK_INTERVAL +done + diff --git a/test_ns_zones.sh b/test_ns_zones.sh new file mode 100755 index 0000000..1d61bdf --- /dev/null +++ b/test_ns_zones.sh @@ -0,0 +1,41 @@ +#!/bin/zsh +# Version 01.0 +# Script to test DNS zone propagation across ns1, ns2, and ns3 +# Script Name: test_ns_zones.sh + +# Variables +NS1="23.88.113.138" +NS2="116.202.112.180" +NS3="95.216.198.140" + +# Check if a domain name argument is provided +if [ -z "$1" ]; then + echo "Usage: $0 " + exit 1 +fi + +DOMAIN=$1 + +# Function to test a DNS query +function test_ns { + local NS=$1 + echo " +=== Testing $DOMAIN on $NS ===" + dig @$NS $DOMAIN SOA +short +echo "" + echo "MX Record:" + dig @$NS $DOMAIN MX +short +echo "" + echo "A Record for mail.$DOMAIN:" + dig @$NS mail.$DOMAIN A +short +echo "" +} + +# Test each nameserver +test_ns $NS1 +test_ns $NS2 +test_ns $NS3 + +# Success message +echo "DNS zone test completed for $DOMAIN" + diff --git a/wrap_embeddings.sh b/wrap_embeddings.sh new file mode 100755 index 0000000..a905c93 --- /dev/null +++ b/wrap_embeddings.sh @@ -0,0 +1,43 @@ +#!/bin/bash +# Script Version: 0.3 +# Description: Convert each .txt in content/ to .json with embedding in json/ + +# Set variables +CONTENT_DIR="./content" +JSON_DIR="./json" +EMBEDDING_MODEL="sentence-transformers/all-MiniLM-L6-v2" + +# Check dependencies +if ! python3 -c "import sentence_transformers" 2>/dev/null; then + echo "[ERROR] ❌ sentence-transformers not installed. Run: pip3 install sentence-transformers" + exit 1 +fi + +# Check input files +mkdir -p "$JSON_DIR" +if [ ! -d "$CONTENT_DIR" ] || ! ls "$CONTENT_DIR"/*.txt >/dev/null 2>&1; then + echo "[ERROR] ❌ No .txt files found in $CONTENT_DIR" + exit 1 +fi + +# Generate embeddings +python3 -c " +import sys, json, os +from sentence_transformers import SentenceTransformer +content_dir, json_dir = sys.argv[1], sys.argv[2] +model = SentenceTransformer('${EMBEDDING_MODEL}') +for txt_file in os.listdir(content_dir): + if txt_file.endswith('.txt'): + base_name = txt_file[:-4] + try: + with open(os.path.join(content_dir, txt_file), 'r', encoding='utf-8') as f: + text = f.read() + embedding = model.encode([text])[0].tolist() + with open(os.path.join(json_dir, f'{base_name}.json'), 'w') as f: + json.dump({'id': base_name, 'text': text, 'embedding': embedding}, f) + print(f'[DEBUG] ✅ Saved: {json_dir}/{base_name}.json') + except Exception as e: + print(f'[ERROR] ❌ Failed: {txt_file} - {str(e)}', file=sys.stderr) +" "$CONTENT_DIR" "$JSON_DIR" 2>&1 | while read -r line; do echo "$line"; done + +echo "✅ All .txt files converted to JSON with embeddings in $JSON_DIR"