Update 2025-04-13_15:16:39

2025-04-13 15:16:39 +02:00
commit 0a1a209dac
12 changed files with 986 additions and 0 deletions
--- a/import_embeddings.py
+++ b/import_embeddings.py
@ -0,0 +1,42 @@
+#!/usr/bin/env python3
+# Script Version: 0.6
+# Description: Import existing embeddings.json into Open WebUI's ChromaDB instance using the new client API
+
+import os
+import json
+from chromadb import PersistentClient
+
+# Use Open WebUI's active Chroma DB directory
+CHROMA_DIR = "/srv/open-webui/backend/data/vector_db"
+COLLECTION_NAME = "cds_docs"
+EMBEDDING_FILE = "embeddings.json"
+CONTENT_DIR = "content"
+
+# Stop Open WebUI before running this script to avoid file lock issues
+client = PersistentClient(path=CHROMA_DIR)
+collection = client.get_or_create_collection(name=COLLECTION_NAME)
+
+# Load existing embeddings
+with open(EMBEDDING_FILE, "r") as f:
+    embeddings_data = json.load(f)
+
+imported_count = 0
+
+# Ingest each document
+for filename, vector in embeddings_data.items():
+    filepath = os.path.join(CONTENT_DIR, filename)
+    try:
+        with open(filepath, "r", encoding="utf-8") as f:
+            text = f.read().strip()
+        collection.add(
+            documents=[text],
+            metadatas=[{"filename": filename}],
+            ids=[filename],
+            embeddings=[vector]
+        )
+        imported_count += 1
+    except FileNotFoundError:
+        print(f"[WARN] Skipping missing file: {filepath}")
+
+print(f"✅ Embeddings successfully imported into Chroma: {imported_count} documents")
+