Update 2025-04-13_15:16:39
This commit is contained in:
68
Fine-Tune_LoRA_GPU.py
Normal file
68
Fine-Tune_LoRA_GPU.py
Normal file
@ -0,0 +1,68 @@
|
|||||||
|
from transformers import AutoModelForCausalLM, AutoTokenizer, TrainingArguments, Trainer
|
||||||
|
from peft import LoraConfig, get_peft_model
|
||||||
|
from datasets import Dataset
|
||||||
|
import torch
|
||||||
|
import os
|
||||||
|
|
||||||
|
# Load model and tokenizer from HF
|
||||||
|
model_name = "Qwen/Qwen2.5-1.5B"
|
||||||
|
tokenizer = AutoTokenizer.from_pretrained(model_name)
|
||||||
|
model = AutoModelForCausalLM.from_pretrained(model_name, device_map="auto") # GPU
|
||||||
|
|
||||||
|
# Prepare dataset: each .txt file as one example
|
||||||
|
content_dir = "./content"
|
||||||
|
texts = []
|
||||||
|
|
||||||
|
for txt_file in os.listdir(content_dir):
|
||||||
|
if txt_file.endswith(".txt"):
|
||||||
|
with open(os.path.join(content_dir, txt_file), "r", encoding="utf-8") as tf:
|
||||||
|
# Join all lines in the file into one text
|
||||||
|
text = " ".join(line.strip() for line in tf.readlines() if line.strip())
|
||||||
|
texts.append(text)
|
||||||
|
|
||||||
|
dataset = Dataset.from_dict({"text": texts})
|
||||||
|
print(f"Dataset size: {len(dataset)}") # Should be ~300
|
||||||
|
|
||||||
|
def tokenize_function(examples):
|
||||||
|
# Tokenize the text
|
||||||
|
tokenized = tokenizer(examples["text"], padding="max_length", truncation=True, max_length=128)
|
||||||
|
# Create labels for causal LM
|
||||||
|
tokenized["labels"] = tokenized["input_ids"].copy()
|
||||||
|
return tokenized
|
||||||
|
|
||||||
|
tokenized_dataset = dataset.map(tokenize_function, batched=True, remove_columns=["text"])
|
||||||
|
|
||||||
|
# Configure LoRA
|
||||||
|
lora_config = LoraConfig(
|
||||||
|
r=8,
|
||||||
|
lora_alpha=32,
|
||||||
|
target_modules=["q_proj", "v_proj"],
|
||||||
|
lora_dropout=0.1,
|
||||||
|
)
|
||||||
|
model = get_peft_model(model, lora_config)
|
||||||
|
|
||||||
|
# Training arguments
|
||||||
|
training_args = TrainingArguments(
|
||||||
|
output_dir="./fine_tuned_qwen2_5_1_5b",
|
||||||
|
per_device_train_batch_size=8,
|
||||||
|
gradient_accumulation_steps=1,
|
||||||
|
num_train_epochs=5,
|
||||||
|
learning_rate=2e-4,
|
||||||
|
save_steps=50,
|
||||||
|
logging_steps=10,
|
||||||
|
fp16=True,
|
||||||
|
)
|
||||||
|
|
||||||
|
# Trainer
|
||||||
|
trainer = Trainer(
|
||||||
|
model=model,
|
||||||
|
args=training_args,
|
||||||
|
train_dataset=tokenized_dataset,
|
||||||
|
)
|
||||||
|
|
||||||
|
# Fine-tune
|
||||||
|
trainer.train()
|
||||||
|
|
||||||
|
# Save
|
||||||
|
model.save_pretrained("./fine_tuned_qwen2_5_1_5b")
|
||||||
|
tokenizer.save_pretrained("./fine_tuned_qwen2_5_1_5b")
|
50
f2b_status.sh
Executable file
50
f2b_status.sh
Executable file
@ -0,0 +1,50 @@
|
|||||||
|
#!/bin/bash
|
||||||
|
# =============================================================================
|
||||||
|
# Script Name: f2b_status.sh
|
||||||
|
# Version: 1.6
|
||||||
|
# Description: This script retrieves and displays the status of all Fail2Ban
|
||||||
|
# jails, including error handling and logging.
|
||||||
|
# =============================================================================
|
||||||
|
|
||||||
|
# Log file path
|
||||||
|
LOG_FILE="/var/log/fail2ban-status.log"
|
||||||
|
|
||||||
|
# Function to log messages with timestamp
|
||||||
|
log_message() {
|
||||||
|
echo "$(date +"%Y-%m-%d %H:%M:%S") - $1" | tee -a "$LOG_FILE"
|
||||||
|
}
|
||||||
|
|
||||||
|
# Function to retrieve the list of jails
|
||||||
|
get_jail_list() {
|
||||||
|
fail2ban-client status | grep 'Jail list:' | cut -d ":" -f2 | tr -d ',' | xargs
|
||||||
|
}
|
||||||
|
|
||||||
|
# Retrieve the list of jails
|
||||||
|
log_message "Retrieving the list of Fail2Ban jails..."
|
||||||
|
JAIL_LIST=$(get_jail_list)
|
||||||
|
|
||||||
|
# Check if any jails were found
|
||||||
|
if [ -z "$JAIL_LIST" ]; then
|
||||||
|
log_message "No jails found."
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Convert JAIL_LIST into an array
|
||||||
|
IFS=' ' read -r -a JAIL_ARRAY <<< "$JAIL_LIST"
|
||||||
|
|
||||||
|
# Iterate over each jail and display its status
|
||||||
|
for JAIL in "${JAIL_ARRAY[@]}"; do
|
||||||
|
log_message "Retrieving status for jail: $JAIL"
|
||||||
|
STATUS=$(fail2ban-client status "$JAIL" 2>&1)
|
||||||
|
|
||||||
|
if echo "$STATUS" | grep -q "Sorry but the jail"; then
|
||||||
|
log_message "Failed to retrieve status for jail: $JAIL. Error: $STATUS"
|
||||||
|
else
|
||||||
|
log_message "Status for jail $JAIL retrieved successfully."
|
||||||
|
echo "Status for jail: $JAIL"
|
||||||
|
echo "$STATUS"
|
||||||
|
echo "----------------------------"
|
||||||
|
fi
|
||||||
|
done
|
||||||
|
|
||||||
|
log_message "Fail2Ban status check completed."
|
208
gitea_push.sh
Executable file
208
gitea_push.sh
Executable file
@ -0,0 +1,208 @@
|
|||||||
|
#!/bin/zsh
|
||||||
|
# Script Version: 1.4
|
||||||
|
# Description: Pushes the current folder (e.g. /etc) to a nested Gitea repo using provided nesting arguments. Auto-creates the remote repo via Gitea API if missing.
|
||||||
|
|
||||||
|
# Set variables
|
||||||
|
# ========
|
||||||
|
|
||||||
|
# Try to extract GITEA_API_TOKEN from ~/.netrc if present
|
||||||
|
if [ -z "$GITEA_API_TOKEN" ] && grep -q '^GITEA_API_TOKEN=' ~/.netrc 2>/dev/null; then
|
||||||
|
GITEA_API_TOKEN=$(grep '^GITEA_API_TOKEN=' ~/.netrc | head -n1 | cut -d= -f2 | xargs)
|
||||||
|
export GITEA_API_TOKEN
|
||||||
|
fi
|
||||||
|
|
||||||
|
GITEA_USER=$(awk '{for(i=1;i<=NF;i++) if($i=="login") print $(i+1)}' ~/.netrc | head -n1)
|
||||||
|
if [ -z "$GITEA_USER" ]; then
|
||||||
|
echo "[ERROR] No login found in ~/.netrc"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
GITEA_URL="https://$(awk '{for(i=1;i<=NF;i++) if($i=="machine") print $(i+1)}' ~/.netrc | head -n1)"
|
||||||
|
if [ -z "$GITEA_URL" ]; then
|
||||||
|
echo "[ERROR] No URL found in ~/.netrc"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
GITEA_API_URL="$GITEA_URL/api/v1"
|
||||||
|
|
||||||
|
PRIVATE=false
|
||||||
|
DEBUG=false
|
||||||
|
COMMIT_MESSAGE="Update $(date +%F_%T)"
|
||||||
|
|
||||||
|
# Logging function
|
||||||
|
# ========
|
||||||
|
log() {
|
||||||
|
local level="$1"; shift
|
||||||
|
if [ "$level" = "DEBUG" ] && [ "$DEBUG" != true ]; then return; fi
|
||||||
|
local color_reset="$(tput sgr0)"
|
||||||
|
local color=""
|
||||||
|
case "$level" in
|
||||||
|
INFO) color="$(tput setaf 2)" ;; # green
|
||||||
|
WARNING) color="$(tput setaf 3)" ;; # yellow
|
||||||
|
ERROR) color="$(tput setaf 1)" ;; # red
|
||||||
|
DEBUG) color="$(tput setaf 4)" ;; # blue
|
||||||
|
esac
|
||||||
|
echo "${color}[$level] $*${color_reset}"
|
||||||
|
}
|
||||||
|
|
||||||
|
# Functions
|
||||||
|
# ========
|
||||||
|
create_repo() {
|
||||||
|
log INFO "Repository does not exist. Creating via API: $REMOTE_PATH"
|
||||||
|
log DEBUG "POST $GITEA_API_URL/user/repos with name=$REMOTE_PATH and private=$PRIVATE"
|
||||||
|
RESPONSE=$(curl -s -X POST \
|
||||||
|
-H "Authorization: token $GITEA_API_TOKEN" \
|
||||||
|
-H "Content-Type: application/json" \
|
||||||
|
-d "{\"name\": \"$FOLDER_NAME\", \"private\": $PRIVATE}" \
|
||||||
|
"$GITEA_API_URL/user/repos")
|
||||||
|
|
||||||
|
if echo "$RESPONSE" | grep -q '"clone_url"'; then
|
||||||
|
log INFO "Remote repository created successfully."
|
||||||
|
else
|
||||||
|
log ERROR "Failed to create remote repository: $RESPONSE"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
}
|
||||||
|
|
||||||
|
prepare_commit() {
|
||||||
|
git add .
|
||||||
|
if ! git rev-parse --verify HEAD >/dev/null 2>&1; then
|
||||||
|
log INFO "Creating initial commit"
|
||||||
|
git commit -m "$COMMIT_MESSAGE"
|
||||||
|
else
|
||||||
|
log INFO "Committing changes"
|
||||||
|
git commit -m "$COMMIT_MESSAGE" || log INFO "Nothing to commit"
|
||||||
|
fi
|
||||||
|
}
|
||||||
|
|
||||||
|
setup_remote() {
|
||||||
|
if git remote | grep -q '^origin$'; then
|
||||||
|
log INFO "Updating remote origin URL"
|
||||||
|
git remote set-url origin "$GIT_REMOTE"
|
||||||
|
else
|
||||||
|
log INFO "Adding remote origin"
|
||||||
|
git remote add origin "$GIT_REMOTE"
|
||||||
|
fi
|
||||||
|
}
|
||||||
|
|
||||||
|
push_changes() {
|
||||||
|
log INFO "Pushing to $GIT_REMOTE"
|
||||||
|
git push -u origin main
|
||||||
|
}
|
||||||
|
|
||||||
|
# Show help if no arguments are given
|
||||||
|
# ========
|
||||||
|
if [ $# -eq 0 ]; then
|
||||||
|
echo "GITEA_API_TOKEN=<your token>"
|
||||||
|
echo "Usage: $0 [--private] [--debug] [--message \"your commit message\"] <host_group>"
|
||||||
|
echo "Example: $0 server"
|
||||||
|
echo " $0 --private workstation"
|
||||||
|
echo " $0 --debug server"
|
||||||
|
echo " $0 --message \"minor update\" server"
|
||||||
|
echo
|
||||||
|
echo "Note: You must cd into the target folder before running this script."
|
||||||
|
echo "For example:"
|
||||||
|
echo " cd /etc && $0 server"
|
||||||
|
echo
|
||||||
|
echo "Authentication:"
|
||||||
|
echo " Git uses ~/.netrc for authentication. You can create it like this:"
|
||||||
|
echo " echo \"machine \$(echo \"$GITEA_URL\" | sed 's|https\\?://||') login $GITEA_USER password \"<password>\"\" > ~/.netrc"
|
||||||
|
echo " chmod 600 ~/.netrc"
|
||||||
|
exit 0
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Parse arguments
|
||||||
|
# ========
|
||||||
|
POSITIONAL_ARGS=()
|
||||||
|
while [[ $# -gt 0 ]]; do
|
||||||
|
case "$1" in
|
||||||
|
--private)
|
||||||
|
PRIVATE=true
|
||||||
|
shift
|
||||||
|
;;
|
||||||
|
--debug)
|
||||||
|
DEBUG=true
|
||||||
|
shift
|
||||||
|
;;
|
||||||
|
--message)
|
||||||
|
COMMIT_MESSAGE="$2"
|
||||||
|
shift 2
|
||||||
|
;;
|
||||||
|
*)
|
||||||
|
POSITIONAL_ARGS+=("$1")
|
||||||
|
shift
|
||||||
|
;;
|
||||||
|
esac
|
||||||
|
done
|
||||||
|
|
||||||
|
set -- "${POSITIONAL_ARGS[@]}"
|
||||||
|
|
||||||
|
if [ $# -ne 1 ]; then
|
||||||
|
echo "Usage: $0 [--private] [--debug] [--message \"your commit message\"] <host_group>"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
HOST_GROUP=$(echo "$1" | tr '[:upper:]' '[:lower:]' | tr -cd 'a-z0-9-')
|
||||||
|
HOST_NAME=$(hostname -s | tr '[:upper:]' '[:lower:]' | tr -cd 'a-z0-9-')
|
||||||
|
FOLDER_NAME="${HOST_NAME}-$(basename "$PWD")"
|
||||||
|
REPO_PATH="$PWD"
|
||||||
|
REMOTE_PATH="$FOLDER_NAME"
|
||||||
|
GIT_REMOTE="$GITEA_URL/$GITEA_USER/$FOLDER_NAME.git"
|
||||||
|
|
||||||
|
# Git authentication hint
|
||||||
|
log DEBUG "Ensure ~/.netrc has: machine <host> login $GITEA_USER password <personal access token>"
|
||||||
|
|
||||||
|
# Check if GITEA_API_TOKEN is set
|
||||||
|
if [ -z "$GITEA_API_TOKEN" ]; then
|
||||||
|
log WARNING "GITEA_API_TOKEN is not set. Skipping API repo creation."
|
||||||
|
else
|
||||||
|
# Check if remote repo exists
|
||||||
|
HTTP_STATUS=$(curl -s -o /dev/null -w "%{http_code}" \
|
||||||
|
-H "Authorization: token $GITEA_API_TOKEN" \
|
||||||
|
"$GITEA_API_URL/repos/$GITEA_USER/$FOLDER_NAME")
|
||||||
|
|
||||||
|
if [ "$HTTP_STATUS" -ne 200 ]; then
|
||||||
|
create_repo
|
||||||
|
else
|
||||||
|
log INFO "Remote repository already exists."
|
||||||
|
fi
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Main Process
|
||||||
|
# ========
|
||||||
|
|
||||||
|
# Safety check against pushing from / or $HOME
|
||||||
|
if [[ "$PWD" == "$HOME" || "$PWD" == "/" ]]; then
|
||||||
|
log ERROR "Refusing to run inside \$PWD=$PWD"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
log INFO "Pushing $REPO_PATH to $GIT_REMOTE"
|
||||||
|
cd "$REPO_PATH" || { log ERROR "Directory $REPO_PATH not found"; exit 1; }
|
||||||
|
|
||||||
|
# Initialize git if needed
|
||||||
|
# Branch is fixed to 'main' for simplicity and consistency
|
||||||
|
if [ ! -d .git ]; then
|
||||||
|
log INFO "Initializing Git repo"
|
||||||
|
git init
|
||||||
|
git config init.defaultBranch main
|
||||||
|
git checkout -b main
|
||||||
|
else
|
||||||
|
log DEBUG ".git directory already present"
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Ensure at least one commit exists
|
||||||
|
prepare_commit
|
||||||
|
|
||||||
|
# Set or update remote
|
||||||
|
if [ "$HTTP_STATUS" -eq 200 ]; then
|
||||||
|
setup_remote
|
||||||
|
else
|
||||||
|
log WARNING "Skipping remote setup – repository does not exist."
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Push to remote
|
||||||
|
if [ "$HTTP_STATUS" -eq 200 ]; then
|
||||||
|
push_changes
|
||||||
|
else
|
||||||
|
log WARNING "Skipping push – repository does not exist."
|
||||||
|
fi
|
||||||
|
|
42
import_embeddings.py
Normal file
42
import_embeddings.py
Normal file
@ -0,0 +1,42 @@
|
|||||||
|
#!/usr/bin/env python3
|
||||||
|
# Script Version: 0.6
|
||||||
|
# Description: Import existing embeddings.json into Open WebUI's ChromaDB instance using the new client API
|
||||||
|
|
||||||
|
import os
|
||||||
|
import json
|
||||||
|
from chromadb import PersistentClient
|
||||||
|
|
||||||
|
# Use Open WebUI's active Chroma DB directory
|
||||||
|
CHROMA_DIR = "/srv/open-webui/backend/data/vector_db"
|
||||||
|
COLLECTION_NAME = "cds_docs"
|
||||||
|
EMBEDDING_FILE = "embeddings.json"
|
||||||
|
CONTENT_DIR = "content"
|
||||||
|
|
||||||
|
# Stop Open WebUI before running this script to avoid file lock issues
|
||||||
|
client = PersistentClient(path=CHROMA_DIR)
|
||||||
|
collection = client.get_or_create_collection(name=COLLECTION_NAME)
|
||||||
|
|
||||||
|
# Load existing embeddings
|
||||||
|
with open(EMBEDDING_FILE, "r") as f:
|
||||||
|
embeddings_data = json.load(f)
|
||||||
|
|
||||||
|
imported_count = 0
|
||||||
|
|
||||||
|
# Ingest each document
|
||||||
|
for filename, vector in embeddings_data.items():
|
||||||
|
filepath = os.path.join(CONTENT_DIR, filename)
|
||||||
|
try:
|
||||||
|
with open(filepath, "r", encoding="utf-8") as f:
|
||||||
|
text = f.read().strip()
|
||||||
|
collection.add(
|
||||||
|
documents=[text],
|
||||||
|
metadatas=[{"filename": filename}],
|
||||||
|
ids=[filename],
|
||||||
|
embeddings=[vector]
|
||||||
|
)
|
||||||
|
imported_count += 1
|
||||||
|
except FileNotFoundError:
|
||||||
|
print(f"[WARN] Skipping missing file: {filepath}")
|
||||||
|
|
||||||
|
print(f"✅ Embeddings successfully imported into Chroma: {imported_count} documents")
|
||||||
|
|
108
lxc_create_container.sh
Executable file
108
lxc_create_container.sh
Executable file
@ -0,0 +1,108 @@
|
|||||||
|
#!/bin/bash
|
||||||
|
# =============================================================================
|
||||||
|
# Script Name: lxc_create_container.sh
|
||||||
|
# Version: 1.1
|
||||||
|
# Description: This script creates a new LXC container from a template, assigns
|
||||||
|
# a unique MAC address, updates the hostname and /etc/hosts file,
|
||||||
|
# and verifies internet access.
|
||||||
|
# =============================================================================
|
||||||
|
|
||||||
|
# Prompt for the new container hostname
|
||||||
|
read -e -p "LXCHOSTNAME: " LXCHOSTNAME
|
||||||
|
export LXCHOSTNAME
|
||||||
|
|
||||||
|
# Check if the template container is running and stop it if necessary
|
||||||
|
if lxc-info -n template | grep -q 'RUNNING'; then
|
||||||
|
echo "Stopping the template container..."
|
||||||
|
if ! lxc-stop -n template; then
|
||||||
|
echo "Failed to stop the template container."
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
else
|
||||||
|
echo "Template container is not running."
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Copy the template to create a new container with the given hostname
|
||||||
|
echo "Creating a new container with hostname: $LXCHOSTNAME..."
|
||||||
|
if ! lxc-copy -n template -N "$LXCHOSTNAME"; then
|
||||||
|
echo "Failed to copy the template container."
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Function to generate a unique MAC address
|
||||||
|
generate_unique_hwaddr() {
|
||||||
|
local hwaddr
|
||||||
|
local existing_hwaddrs
|
||||||
|
while : ; do
|
||||||
|
hwaddr=$(printf '00:16:3e:%02x:%02x:%02x\n' $((RANDOM%256)) $((RANDOM%256)) $((RANDOM%256)))
|
||||||
|
existing_hwaddrs=$(grep "lxc.net.0.hwaddr" /var/lib/lxc/*/config | grep "$hwaddr")
|
||||||
|
if [ -z "$existing_hwaddrs" ]; then
|
||||||
|
# MAC address is unique
|
||||||
|
echo "$hwaddr"
|
||||||
|
return
|
||||||
|
fi
|
||||||
|
done
|
||||||
|
}
|
||||||
|
|
||||||
|
# Generate a unique MAC address
|
||||||
|
NEW_HWADDR=$(generate_unique_hwaddr)
|
||||||
|
|
||||||
|
# Path to the LXC configuration file
|
||||||
|
CONFIG_FILE="/var/lib/lxc/$LXCHOSTNAME/config"
|
||||||
|
|
||||||
|
# Replace the existing hwaddr line
|
||||||
|
echo "Updating MAC address in $CONFIG_FILE to $NEW_HWADDR..."
|
||||||
|
if ! sed -i "/^lxc.net.0.hwaddr/c\lxc.net.0.hwaddr = $NEW_HWADDR" "$CONFIG_FILE"; then
|
||||||
|
echo "Failed to update MAC address in $CONFIG_FILE."
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Start the new container
|
||||||
|
echo "Starting the new container..."
|
||||||
|
if ! lxc-start -n "$LXCHOSTNAME"; then
|
||||||
|
echo "Failed to start the container $LXCHOSTNAME."
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Wait for the container to start
|
||||||
|
sleep 5
|
||||||
|
|
||||||
|
# Change the hostname inside the container
|
||||||
|
echo "Changing the hostname inside the container..."
|
||||||
|
if ! lxc-attach -n "$LXCHOSTNAME" -- bash -c "echo '$LXCHOSTNAME' > /etc/hostname" || \
|
||||||
|
! lxc-attach -n "$LXCHOSTNAME" -- hostname "$LXCHOSTNAME"; then
|
||||||
|
echo "Failed to set the hostname inside the container."
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Update /etc/hosts
|
||||||
|
echo "Updating /etc/hosts inside the container..."
|
||||||
|
if ! lxc-attach -n "$LXCHOSTNAME" -- bash -c "echo '127.0.0.1 $LXCHOSTNAME' >> /etc/hosts"; then
|
||||||
|
echo "Failed to update /etc/hosts inside the container."
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Ensure the container has internet access (optional)
|
||||||
|
echo "Checking internet connectivity inside the container..."
|
||||||
|
if ! lxc-attach -n "$LXCHOSTNAME" -- ping -c 4 google.com; then
|
||||||
|
echo "Container $LXCHOSTNAME does not have internet access."
|
||||||
|
fi
|
||||||
|
echo
|
||||||
|
|
||||||
|
# Stop and restart the container
|
||||||
|
echo "Restarting the container..."
|
||||||
|
if ! lxc-stop -n "$LXCHOSTNAME" || ! lxc-start -n "$LXCHOSTNAME"; then
|
||||||
|
echo "Failed to restart the container $LXCHOSTNAME."
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Display the MAC addresses to verify the changes
|
||||||
|
echo "Displaying the MAC addresses to verify the changes..."
|
||||||
|
grep lxc.net.0.hwaddr /var/lib/lxc/*/config
|
||||||
|
|
||||||
|
# Wait and list containers to ensure they are running
|
||||||
|
sleep 9
|
||||||
|
echo "Listing all containers..."
|
||||||
|
lxc-ls -f
|
||||||
|
|
||||||
|
echo "LXC container setup completed successfully."
|
82
lxc_list_login.sh
Executable file
82
lxc_list_login.sh
Executable file
@ -0,0 +1,82 @@
|
|||||||
|
#!/bin/bash
|
||||||
|
# =============================================================================
|
||||||
|
# Script Name: lxc_list_login.sh
|
||||||
|
# Version: 03
|
||||||
|
# Description: Lists LXC containers, checks their statuses, and allows login.
|
||||||
|
# =============================================================================
|
||||||
|
|
||||||
|
# Required commands
|
||||||
|
REQUIRED_CMDS=("lxc-ls" "lxc-info" "lxc-start" "lxc-attach")
|
||||||
|
|
||||||
|
# Check if required commands are available
|
||||||
|
for CMD in "${REQUIRED_CMDS[@]}"; do
|
||||||
|
if ! command -v "$CMD" &> /dev/null; then
|
||||||
|
echo "The command $CMD is not installed. Please install it and try again."
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
done
|
||||||
|
|
||||||
|
# List and check LXC containers
|
||||||
|
echo "List of all LXC containers:"
|
||||||
|
CONTAINERS=($(lxc-ls -f | awk 'NR>1 && $1 != "" {print $1}'))
|
||||||
|
|
||||||
|
# Check if there are any containers
|
||||||
|
if [[ ${#CONTAINERS[@]} -eq 0 ]]; then
|
||||||
|
echo "There are no LXC containers."
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Display containers and their status
|
||||||
|
printf "\n%-5s %-20s %-10s\n" "Index" "Container Name" "Status"
|
||||||
|
echo "------------------------------------------"
|
||||||
|
for (( I=0; I<${#CONTAINERS[@]}; I++ )); do
|
||||||
|
LXCHOSTNAME="${CONTAINERS[$I]}"
|
||||||
|
if [[ -n "$LXCHOSTNAME" ]]; then
|
||||||
|
STATUS=$(lxc-info --name="$LXCHOSTNAME" | grep "State" | awk '{print $2}')
|
||||||
|
printf "%-5d %-20s %-10s\n" "$I" "$LXCHOSTNAME" "$STATUS"
|
||||||
|
fi
|
||||||
|
done
|
||||||
|
|
||||||
|
# Prompt user to select a container
|
||||||
|
read -p "Select a container to log in (0-$(( ${#CONTAINERS[@]} - 1 ))): " SELECTION
|
||||||
|
|
||||||
|
# Validate selection
|
||||||
|
if [[ $SELECTION =~ ^[0-9]+$ ]] && [[ $SELECTION -ge 0 && $SELECTION -lt ${#CONTAINERS[@]} ]]; then
|
||||||
|
LXCHOSTNAME="${CONTAINERS[$SELECTION]}"
|
||||||
|
STATUS=$(lxc-info --name="$LXCHOSTNAME" | grep "State" | awk '{print $2}')
|
||||||
|
|
||||||
|
if [[ $STATUS == "STOPPED" ]]; then
|
||||||
|
read -p "Container $LXCHOSTNAME is stopped. Do you want to start it? (y/n) " START_SELECTION
|
||||||
|
if [[ $START_SELECTION == "y" ]]; then
|
||||||
|
echo "Starting the container $LXCHOSTNAME..."
|
||||||
|
if lxc-start --name="$LXCHOSTNAME"; then
|
||||||
|
echo "Container $LXCHOSTNAME has been started."
|
||||||
|
for i in {1..10}; do
|
||||||
|
STATUS=$(lxc-info --name="$LXCHOSTNAME" | grep "State" | awk '{print $2}')
|
||||||
|
if [[ $STATUS == "RUNNING" ]]; then
|
||||||
|
break
|
||||||
|
fi
|
||||||
|
sleep 1
|
||||||
|
done
|
||||||
|
if [[ $STATUS != "RUNNING" ]]; then
|
||||||
|
echo "Container $LXCHOSTNAME failed to start within the timeout period."
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
else
|
||||||
|
echo "Error starting the container $LXCHOSTNAME."
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
else
|
||||||
|
echo "Container $LXCHOSTNAME was not started."
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
fi
|
||||||
|
echo "Logging into the container $LXCHOSTNAME..."
|
||||||
|
if ! lxc-attach --name="$LXCHOSTNAME"; then
|
||||||
|
echo "Error logging into the container $LXCHOSTNAME."
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
else
|
||||||
|
echo "Invalid selection. Please run the script again and choose a valid number."
|
||||||
|
exit 1
|
||||||
|
fi
|
11
raw_training_data.py
Normal file
11
raw_training_data.py
Normal file
@ -0,0 +1,11 @@
|
|||||||
|
import os
|
||||||
|
|
||||||
|
content_dir = "./content"
|
||||||
|
output_file = "raw_training_data.txt"
|
||||||
|
|
||||||
|
with open(output_file, "w", encoding="utf-8") as f:
|
||||||
|
for txt_file in os.listdir(content_dir):
|
||||||
|
if txt_file.endswith(".txt"):
|
||||||
|
with open(os.path.join(content_dir, txt_file), "r", encoding="utf-8") as tf:
|
||||||
|
text = tf.read().strip()
|
||||||
|
f.write(text + "\n") # One text per line
|
189
scraper.sh
Executable file
189
scraper.sh
Executable file
@ -0,0 +1,189 @@
|
|||||||
|
#!/bin/bash
|
||||||
|
# Script Version: 01.8
|
||||||
|
# Description: Scrapes and extracts page text from MediaWiki pages, cleans image artifacts, and deletes empty results
|
||||||
|
|
||||||
|
# Constants
|
||||||
|
DOWNLOAD_TIMEOUT=10
|
||||||
|
TEXT_FILE_SUFFIX=".txt"
|
||||||
|
LINK_FILE_SUFFIX=".txt"
|
||||||
|
|
||||||
|
# Function to convert relative URLs to absolute URLs
|
||||||
|
resolve_url() {
|
||||||
|
local base_url=$1
|
||||||
|
local relative_url=$2
|
||||||
|
|
||||||
|
if [[ "$relative_url" =~ ^https?:// ]]; then
|
||||||
|
echo "$relative_url"
|
||||||
|
elif [[ "$relative_url" =~ ^/ ]]; then
|
||||||
|
echo "${base_url}${relative_url}"
|
||||||
|
else
|
||||||
|
echo "${base_url}/${relative_url}"
|
||||||
|
fi
|
||||||
|
}
|
||||||
|
|
||||||
|
# Function to check if URL should be skipped
|
||||||
|
should_skip_url() {
|
||||||
|
local url=$1
|
||||||
|
case "$url" in
|
||||||
|
*"load.php"*|*"IE9fixes.css"*|*"favicon.ico"*|*"opensearch_desc.php"*|*"api.php?action="*|*"Special:RecentChanges"*|*"Special:UserLogin"*|*"Special:RequestAccount"*|*"Dioxipedia:Privacy_policy"*|*"javascript:print();"*|*"mediawiki.org"*)
|
||||||
|
return 0 ;; # true, should skip
|
||||||
|
*)
|
||||||
|
return 1 ;; # false, don't skip
|
||||||
|
esac
|
||||||
|
}
|
||||||
|
|
||||||
|
# Function to download content into a variable with timeout and error checking
|
||||||
|
download_content() {
|
||||||
|
local url=$1
|
||||||
|
local exclude_file=$2
|
||||||
|
|
||||||
|
if should_skip_url "$url"; then
|
||||||
|
echo "Skipping known irrelevant URL: $url"
|
||||||
|
return 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
if [ -f "$exclude_file" ] && grep -Fx "$url" "$exclude_file" > /dev/null; then
|
||||||
|
echo "Skipping excluded URL: $url"
|
||||||
|
return 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
echo "Downloading: $url"
|
||||||
|
SITECACHE=$(wget -T "$DOWNLOAD_TIMEOUT" -q -O - "$url" 2>/dev/null)
|
||||||
|
if [ $? -ne 0 ] || [ -z "$SITECACHE" ]; then
|
||||||
|
echo -e "\033[31m[ ERROR ]:\033[0m Failed to download $url" >&2
|
||||||
|
echo "$url" >> "$exclude_file"
|
||||||
|
return 1
|
||||||
|
fi
|
||||||
|
if ! echo "$SITECACHE" | grep -q "<html"; then
|
||||||
|
echo "Skipping: $url (not HTML)"
|
||||||
|
echo "$url" >> "$exclude_file"
|
||||||
|
return 1
|
||||||
|
fi
|
||||||
|
sleep 1
|
||||||
|
echo "Successfully downloaded: $url"
|
||||||
|
return 0
|
||||||
|
}
|
||||||
|
|
||||||
|
# Improved extraction function using pup and lynx
|
||||||
|
extract_text() {
|
||||||
|
local output_file=$1
|
||||||
|
local url=$2
|
||||||
|
local exclude_file=$3
|
||||||
|
|
||||||
|
echo "Extracting text from SITECACHE to $output_file"
|
||||||
|
|
||||||
|
EXTRACTED=$(echo "$SITECACHE" | pup '#mw-content-text' 2>/dev/null)
|
||||||
|
|
||||||
|
if [ -z "$(echo "$EXTRACTED" | sed '/^\s*$/d')" ]; then
|
||||||
|
echo "INFO: Content empty with #mw-content-text, trying #bodyContent"
|
||||||
|
EXTRACTED=$(echo "$SITECACHE" | pup '#bodyContent' 2>/dev/null)
|
||||||
|
fi
|
||||||
|
|
||||||
|
if [ -z "$(echo "$EXTRACTED" | sed '/^\s*$/d')" ]; then
|
||||||
|
echo "WARNING: Still no content after both selectors."
|
||||||
|
echo "$url" >> "$exclude_file"
|
||||||
|
return 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
echo "$EXTRACTED" | lynx -stdin -dump -nolist > "$output_file"
|
||||||
|
|
||||||
|
if [ ! -s "$output_file" ]; then
|
||||||
|
echo "WARNING: No text extracted from $url after lynx"
|
||||||
|
echo "$url" >> "$exclude_file"
|
||||||
|
rm -f "$output_file"
|
||||||
|
return 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Remove lines containing image artifacts like [something.jpg] or [something.png]
|
||||||
|
sed -i '/\[.*\(jpg\|jpeg\|png\).*]/Id' "$output_file"
|
||||||
|
|
||||||
|
# Delete if file is smaller than 100 bytes
|
||||||
|
if [ $(stat -c%s "$output_file") -lt 100 ]; then
|
||||||
|
echo "INFO: Deleted $output_file (under 100 bytes)"
|
||||||
|
rm -f "$output_file"
|
||||||
|
echo "$url" >> "$exclude_file"
|
||||||
|
return 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
echo "Successfully extracted text to $output_file"
|
||||||
|
return 0
|
||||||
|
}
|
||||||
|
|
||||||
|
# Function to extract page title
|
||||||
|
extract_title() {
|
||||||
|
echo "$SITECACHE" | grep -oP '(?<=<title>).*(?=</title>)' | head -n 1 | sed 's/ - dioxipedia$//' | sed 's/[^a-zA-Z0-9-]/_/g' | sed 's/__*/_/g' | sed 's/^_//;s/_$//'
|
||||||
|
}
|
||||||
|
|
||||||
|
# Function to extract links
|
||||||
|
extract_links() {
|
||||||
|
local output_file=$1
|
||||||
|
|
||||||
|
echo "$SITECACHE" | grep -oP '(?<=href=")[^"]+' | grep -v 'translate\.goog' > "$output_file"
|
||||||
|
if [ $? -ne 0 ] || [ ! -s "$output_file" ]; then
|
||||||
|
echo "WARNING: No links extracted"
|
||||||
|
rm -f "$output_file"
|
||||||
|
return 1
|
||||||
|
fi
|
||||||
|
echo "Successfully extracted links to $output_file"
|
||||||
|
return 0
|
||||||
|
}
|
||||||
|
|
||||||
|
# Main script logic
|
||||||
|
if [ $# -ne 1 ]; then
|
||||||
|
echo "Usage: $0 <URL>" >&2
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
INITIAL_URL=$1
|
||||||
|
DOMAIN=$(echo "$INITIAL_URL" | awk -F[/:] '{print $4}')
|
||||||
|
BASE_URL="https://$DOMAIN"
|
||||||
|
ALL_PAGES_URL="$BASE_URL/index.php?title=Special:AllPages"
|
||||||
|
LINKSFILE="$DOMAIN/links$LINK_FILE_SUFFIX"
|
||||||
|
EXCLUDE_FILE="$DOMAIN/exclude.txt"
|
||||||
|
CONTENT_DIR="$DOMAIN/content"
|
||||||
|
|
||||||
|
mkdir -p "$DOMAIN"
|
||||||
|
mkdir -p "$CONTENT_DIR"
|
||||||
|
|
||||||
|
# Step 1: Collect links
|
||||||
|
if ! download_content "$ALL_PAGES_URL" "$EXCLUDE_FILE"; then
|
||||||
|
echo "Failed to download $ALL_PAGES_URL"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
if ! extract_links "$LINKSFILE"; then
|
||||||
|
echo "Failed to extract links"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Step 2: Process links
|
||||||
|
PROCESSED_URLS=()
|
||||||
|
while IFS= read -r link; do
|
||||||
|
URL=$(resolve_url "$BASE_URL" "$link")
|
||||||
|
|
||||||
|
if [[ " ${PROCESSED_URLS[*]} " =~ " $URL " ]]; then
|
||||||
|
echo "Skipping processed URL: $URL"
|
||||||
|
continue
|
||||||
|
fi
|
||||||
|
|
||||||
|
if ! download_content "$URL" "$EXCLUDE_FILE"; then
|
||||||
|
PROCESSED_URLS+=("$URL")
|
||||||
|
continue
|
||||||
|
fi
|
||||||
|
|
||||||
|
PAGENAME=$(extract_title)
|
||||||
|
[ -z "$PAGENAME" ] && PAGENAME="page"
|
||||||
|
|
||||||
|
TEXTFILE="$CONTENT_DIR/$PAGENAME$TEXT_FILE_SUFFIX"
|
||||||
|
|
||||||
|
if ! extract_text "$TEXTFILE" "$URL" "$EXCLUDE_FILE"; then
|
||||||
|
PROCESSED_URLS+=("$URL")
|
||||||
|
continue
|
||||||
|
fi
|
||||||
|
|
||||||
|
PROCESSED_URLS+=("$URL")
|
||||||
|
done < "$LINKSFILE"
|
||||||
|
|
||||||
|
echo "Processing complete."
|
||||||
|
exit 0
|
||||||
|
|
47
sendmail_test.sh
Executable file
47
sendmail_test.sh
Executable file
@ -0,0 +1,47 @@
|
|||||||
|
#!/bin/bash
|
||||||
|
# Script Name: sendmail_test.sh
|
||||||
|
# Version: 03
|
||||||
|
# Description: This script sends a test email using sendmail. The recipient's email address is the first argument.
|
||||||
|
# It logs messages to the console only.
|
||||||
|
|
||||||
|
# Check if an argument (email address) is provided
|
||||||
|
if [ -z "$1" ]; then
|
||||||
|
TO="root"
|
||||||
|
else
|
||||||
|
TO="$1"
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Email details
|
||||||
|
SUBJECT="Postfix Test"
|
||||||
|
FROM="$(whoami)@$(hostname)"
|
||||||
|
BODY="This is the email body!"
|
||||||
|
|
||||||
|
# Function to send email
|
||||||
|
send_email() {
|
||||||
|
if ! command -v sendmail &> /dev/null; then
|
||||||
|
echo "Sendmail is not installed or configured. Please ensure sendmail is installed and properly set up." >&2
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
sendmail -t <<EOF
|
||||||
|
To: $TO
|
||||||
|
Subject: $SUBJECT
|
||||||
|
From: $FROM
|
||||||
|
|
||||||
|
$BODY
|
||||||
|
EOF
|
||||||
|
}
|
||||||
|
|
||||||
|
# Function to log messages
|
||||||
|
log_message() {
|
||||||
|
MESSAGE=$1
|
||||||
|
echo "$(date '+%Y-%m-%d %H:%M:%S') - $MESSAGE"
|
||||||
|
}
|
||||||
|
|
||||||
|
# Send email and log the result
|
||||||
|
log_message "Starting email send process."
|
||||||
|
if send_email; then
|
||||||
|
log_message "Email sent successfully to $TO."
|
||||||
|
else
|
||||||
|
log_message "Failed to send email to $TO."
|
||||||
|
fi
|
34
update_openwebui_repo.zsh
Executable file
34
update_openwebui_repo.zsh
Executable file
@ -0,0 +1,34 @@
|
|||||||
|
#!/bin/zsh
|
||||||
|
# Script Version: 0.5
|
||||||
|
# Description: Aktualisiert das Open WebUI Repository und startet den Dienst neu
|
||||||
|
|
||||||
|
cd /srv/open-webui || exit 1
|
||||||
|
|
||||||
|
echo "[DEBUG] Working directory: $(pwd)"
|
||||||
|
|
||||||
|
# Sicherung nicht übergebener Änderungen
|
||||||
|
if [[ -n $(git status --porcelain) ]]; then
|
||||||
|
echo "[DEBUG] Änderungen vorhanden – stash wird ausgeführt"
|
||||||
|
git stash --all
|
||||||
|
else
|
||||||
|
echo "[DEBUG] Keine lokalen Änderungen"
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Repository synchronisieren
|
||||||
|
echo "[DEBUG] Repository wird aktualisiert"
|
||||||
|
git pull --rebase
|
||||||
|
|
||||||
|
# Änderungen zurückholen, falls gestasht
|
||||||
|
if git stash list | grep -q "WIP on"; then
|
||||||
|
echo "[DEBUG] Änderungen werden wiederhergestellt"
|
||||||
|
git stash pop
|
||||||
|
else
|
||||||
|
echo "[DEBUG] Keine Änderungen zum Wiederherstellen"
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Dienst neu starten und Status prüfen
|
||||||
|
systemctl restart openwebui.service
|
||||||
|
sleep 2
|
||||||
|
echo "[DEBUG] openwebui.service wurde erfolgreich neu gestartet"
|
||||||
|
systemctl status openwebui.service --no-pager
|
||||||
|
|
104
update_zone.sh
Executable file
104
update_zone.sh
Executable file
@ -0,0 +1,104 @@
|
|||||||
|
#!/bin/zsh
|
||||||
|
# Script Version: 10.6
|
||||||
|
# Description: Dyn DNS update script, checks token, compares IPs, and updates DNS zone if needed.
|
||||||
|
|
||||||
|
# Set variables
|
||||||
|
# ========
|
||||||
|
TOKEN_FILE="~/.dynProxy_token"
|
||||||
|
IP_FILE="~/.ip.txt"
|
||||||
|
UPDATE_URL="http://ip.dynproxy.net/update_zone"
|
||||||
|
LOG_FILE="/var/log/update_zone.log"
|
||||||
|
|
||||||
|
# Detect if script is running interactively
|
||||||
|
if [[ -t 1 ]]; then
|
||||||
|
INTERACTIVE=true
|
||||||
|
else
|
||||||
|
INTERACTIVE=false
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Functions
|
||||||
|
# ========
|
||||||
|
log() {
|
||||||
|
print "$(date '+%Y-%m-%dT%H:%M:%S.%6N'): $1" >> "$LOG_FILE"
|
||||||
|
}
|
||||||
|
|
||||||
|
info() {
|
||||||
|
if [[ "$INTERACTIVE" = true ]]; then
|
||||||
|
echo "$1"
|
||||||
|
fi
|
||||||
|
}
|
||||||
|
|
||||||
|
# Output FastCGI headers if applicable
|
||||||
|
if [ -t 0 ]; then
|
||||||
|
echo "Content-Type: text/plain"
|
||||||
|
echo ""
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Ensure the token file exists
|
||||||
|
if [ ! -f "$TOKEN_FILE" ]; then
|
||||||
|
log "ERROR: Token file not found."
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Read the token
|
||||||
|
TOKEN=$(< "$TOKEN_FILE")
|
||||||
|
if [ -z "$TOKEN" ]; then
|
||||||
|
log "ERROR: Token is empty."
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Log the token retrieval
|
||||||
|
log "INFO: Token retrieved for update."
|
||||||
|
|
||||||
|
# Fetch the current public IP from the external service
|
||||||
|
IP_CURL=$(curl -s http://ip.dynproxy.net)
|
||||||
|
if [ -z "$IP_CURL" ]; then
|
||||||
|
log "ERROR: Failed to fetch current public IP."
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Ensure the IP file exists
|
||||||
|
if [ ! -f "$IP_FILE" ]; then
|
||||||
|
log "INFO: IP file not found. Creating a new one with current IP."
|
||||||
|
echo "$IP_CURL" > "$IP_FILE"
|
||||||
|
log "INFO: IP file created with initial IP $IP_CURL."
|
||||||
|
info "Initial IP file created with IP: $IP_CURL"
|
||||||
|
PREVIOUS_IP="" # Set to empty to force update logic
|
||||||
|
else
|
||||||
|
# Read the previous IP from the IP file
|
||||||
|
PREVIOUS_IP=$(< "$IP_FILE")
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Compare the current IP with the previous IP
|
||||||
|
if [ "$IP_CURL" != "$PREVIOUS_IP" ]; then
|
||||||
|
log "INFO: IP has changed from $PREVIOUS_IP to $IP_CURL. Proceeding with DNS update."
|
||||||
|
|
||||||
|
# Log the IP to be updated
|
||||||
|
log "INFO: Updating DNS for IP $IP_CURL."
|
||||||
|
|
||||||
|
# Post the token and IP to trigger the DNS zone update
|
||||||
|
RESPONSE=$(curl -s -o /tmp/curl_output -w "%{http_code}" -X POST \
|
||||||
|
-H "Content-Type: application/x-www-form-urlencoded" \
|
||||||
|
--data-urlencode "token=$TOKEN" \
|
||||||
|
--data-urlencode "ip=$IP_CURL" \
|
||||||
|
$UPDATE_URL)
|
||||||
|
|
||||||
|
# Log the response and result
|
||||||
|
if [ "$RESPONSE" -eq 200 ]; then
|
||||||
|
log "SUCCESS: DNS zone update triggered successfully for token $TOKEN and IP $IP_CURL."
|
||||||
|
info "DNS zone update triggered successfully"
|
||||||
|
# Write the new IP to the IP file
|
||||||
|
echo "$IP_CURL" > "$IP_FILE"
|
||||||
|
else
|
||||||
|
log "ERROR: Failed to trigger DNS zone update for token $TOKEN and IP $IP_CURL. Response code: $RESPONSE. Response body: $(cat /tmp/curl_output)"
|
||||||
|
info "Failed to trigger DNS zone update. HTTP response: $RESPONSE"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
else
|
||||||
|
log "INFO: IP has not changed. No update needed."
|
||||||
|
info "IP has not changed. No update needed."
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Cleanup temporary files
|
||||||
|
rm -f /tmp/curl_output
|
||||||
|
|
43
wrap_embeddings.sh
Executable file
43
wrap_embeddings.sh
Executable file
@ -0,0 +1,43 @@
|
|||||||
|
#!/bin/bash
|
||||||
|
# Script Version: 0.3
|
||||||
|
# Description: Convert each .txt in content/ to .json with embedding in json/
|
||||||
|
|
||||||
|
# Set variables
|
||||||
|
CONTENT_DIR="./content"
|
||||||
|
JSON_DIR="./json"
|
||||||
|
EMBEDDING_MODEL="sentence-transformers/all-MiniLM-L6-v2"
|
||||||
|
|
||||||
|
# Check dependencies
|
||||||
|
if ! python3 -c "import sentence_transformers" 2>/dev/null; then
|
||||||
|
echo "[ERROR] ❌ sentence-transformers not installed. Run: pip3 install sentence-transformers"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Check input files
|
||||||
|
mkdir -p "$JSON_DIR"
|
||||||
|
if [ ! -d "$CONTENT_DIR" ] || ! ls "$CONTENT_DIR"/*.txt >/dev/null 2>&1; then
|
||||||
|
echo "[ERROR] ❌ No .txt files found in $CONTENT_DIR"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Generate embeddings
|
||||||
|
python3 -c "
|
||||||
|
import sys, json, os
|
||||||
|
from sentence_transformers import SentenceTransformer
|
||||||
|
content_dir, json_dir = sys.argv[1], sys.argv[2]
|
||||||
|
model = SentenceTransformer('${EMBEDDING_MODEL}')
|
||||||
|
for txt_file in os.listdir(content_dir):
|
||||||
|
if txt_file.endswith('.txt'):
|
||||||
|
base_name = txt_file[:-4]
|
||||||
|
try:
|
||||||
|
with open(os.path.join(content_dir, txt_file), 'r', encoding='utf-8') as f:
|
||||||
|
text = f.read()
|
||||||
|
embedding = model.encode([text])[0].tolist()
|
||||||
|
with open(os.path.join(json_dir, f'{base_name}.json'), 'w') as f:
|
||||||
|
json.dump({'id': base_name, 'text': text, 'embedding': embedding}, f)
|
||||||
|
print(f'[DEBUG] ✅ Saved: {json_dir}/{base_name}.json')
|
||||||
|
except Exception as e:
|
||||||
|
print(f'[ERROR] ❌ Failed: {txt_file} - {str(e)}', file=sys.stderr)
|
||||||
|
" "$CONTENT_DIR" "$JSON_DIR" 2>&1 | while read -r line; do echo "$line"; done
|
||||||
|
|
||||||
|
echo "✅ All .txt files converted to JSON with embeddings in $JSON_DIR"
|
Reference in New Issue
Block a user