#!/usr/bin/env python3 # Script Version: 0.6 # Description: Import existing embeddings.json into Open WebUI's ChromaDB instance using the new client API import os import json from chromadb import PersistentClient # Use Open WebUI's active Chroma DB directory CHROMA_DIR = "/srv/open-webui/backend/data/vector_db" COLLECTION_NAME = "cds_docs" EMBEDDING_FILE = "embeddings.json" CONTENT_DIR = "content" # Stop Open WebUI before running this script to avoid file lock issues client = PersistentClient(path=CHROMA_DIR) collection = client.get_or_create_collection(name=COLLECTION_NAME) # Load existing embeddings with open(EMBEDDING_FILE, "r") as f: embeddings_data = json.load(f) imported_count = 0 # Ingest each document for filename, vector in embeddings_data.items(): filepath = os.path.join(CONTENT_DIR, filename) try: with open(filepath, "r", encoding="utf-8") as f: text = f.read().strip() collection.add( documents=[text], metadatas=[{"filename": filename}], ids=[filename], embeddings=[vector] ) imported_count += 1 except FileNotFoundError: print(f"[WARN] Skipping missing file: {filepath}") print(f"✅ Embeddings successfully imported into Chroma: {imported_count} documents")