Update 2025-04-13_15:16:39

This commit is contained in:
2025-04-13 15:16:39 +02:00
commit 0a1a209dac
12 changed files with 986 additions and 0 deletions

42
import_embeddings.py Normal file
View File

@ -0,0 +1,42 @@
#!/usr/bin/env python3
# Script Version: 0.6
# Description: Import existing embeddings.json into Open WebUI's ChromaDB instance using the new client API
import os
import json
from chromadb import PersistentClient
# Use Open WebUI's active Chroma DB directory
CHROMA_DIR = "/srv/open-webui/backend/data/vector_db"
COLLECTION_NAME = "cds_docs"
EMBEDDING_FILE = "embeddings.json"
CONTENT_DIR = "content"
# Stop Open WebUI before running this script to avoid file lock issues
client = PersistentClient(path=CHROMA_DIR)
collection = client.get_or_create_collection(name=COLLECTION_NAME)
# Load existing embeddings
with open(EMBEDDING_FILE, "r") as f:
embeddings_data = json.load(f)
imported_count = 0
# Ingest each document
for filename, vector in embeddings_data.items():
filepath = os.path.join(CONTENT_DIR, filename)
try:
with open(filepath, "r", encoding="utf-8") as f:
text = f.read().strip()
collection.add(
documents=[text],
metadatas=[{"filename": filename}],
ids=[filename],
embeddings=[vector]
)
imported_count += 1
except FileNotFoundError:
print(f"[WARN] Skipping missing file: {filepath}")
print(f"✅ Embeddings successfully imported into Chroma: {imported_count} documents")