#!/bin/bash # Script Version: 0.3 # Description: Convert each .txt in content/ to .json with embedding in json/ # Set variables CONTENT_DIR="./content" JSON_DIR="./json" EMBEDDING_MODEL="sentence-transformers/all-MiniLM-L6-v2" # Check dependencies if ! python3 -c "import sentence_transformers" 2>/dev/null; then echo "[ERROR] ❌ sentence-transformers not installed. Run: pip3 install sentence-transformers" exit 1 fi # Check input files mkdir -p "$JSON_DIR" if [ ! -d "$CONTENT_DIR" ] || ! ls "$CONTENT_DIR"/*.txt >/dev/null 2>&1; then echo "[ERROR] ❌ No .txt files found in $CONTENT_DIR" exit 1 fi # Generate embeddings python3 -c " import sys, json, os from sentence_transformers import SentenceTransformer content_dir, json_dir = sys.argv[1], sys.argv[2] model = SentenceTransformer('${EMBEDDING_MODEL}') for txt_file in os.listdir(content_dir): if txt_file.endswith('.txt'): base_name = txt_file[:-4] try: with open(os.path.join(content_dir, txt_file), 'r', encoding='utf-8') as f: text = f.read() embedding = model.encode([text])[0].tolist() with open(os.path.join(json_dir, f'{base_name}.json'), 'w') as f: json.dump({'id': base_name, 'text': text, 'embedding': embedding}, f) print(f'[DEBUG] ✅ Saved: {json_dir}/{base_name}.json') except Exception as e: print(f'[ERROR] ❌ Failed: {txt_file} - {str(e)}', file=sys.stderr) " "$CONTENT_DIR" "$JSON_DIR" 2>&1 | while read -r line; do echo "$line"; done echo "✅ All .txt files converted to JSON with embeddings in $JSON_DIR"