#!/usr/bin/env python3 # Script Version: 01 # Description: Generate embeddings from text files using GPU (if available) import os import torch import json from sentence_transformers import SentenceTransformer # Set variables # ======== CONTENT_DIR = "content" OUTPUT_FILE = "embeddings.json" # Initialize the embedding model and move to GPU if available model = SentenceTransformer("all-mpnet-base-v2") if torch.cuda.is_available(): model = model.to("cuda") print("[INFO] GPU detected: Model running on GPU") else: print("[INFO] No GPU detected: Model running on CPU") # Generate embeddings # ======== embedding_data = {} for filename in os.listdir(CONTENT_DIR): if filename.endswith(".txt"): filepath = os.path.join(CONTENT_DIR, filename) with open(filepath, "r", encoding="utf-8") as file: text = file.read().strip() embedding = model.encode(text) embedding_data[filename] = embedding.tolist() # Save embeddings to JSON with open(OUTPUT_FILE, "w") as json_file: json.dump(embedding_data, json_file, indent=4) print(f"[INFO] Embeddings successfully saved to {OUTPUT_FILE}")