Update 2025-04-13_15:16:39
This commit is contained in:
42
import_embeddings.py
Normal file
42
import_embeddings.py
Normal file
@ -0,0 +1,42 @@
|
||||
#!/usr/bin/env python3
|
||||
# Script Version: 0.6
|
||||
# Description: Import existing embeddings.json into Open WebUI's ChromaDB instance using the new client API
|
||||
|
||||
import os
|
||||
import json
|
||||
from chromadb import PersistentClient
|
||||
|
||||
# Use Open WebUI's active Chroma DB directory
|
||||
CHROMA_DIR = "/srv/open-webui/backend/data/vector_db"
|
||||
COLLECTION_NAME = "cds_docs"
|
||||
EMBEDDING_FILE = "embeddings.json"
|
||||
CONTENT_DIR = "content"
|
||||
|
||||
# Stop Open WebUI before running this script to avoid file lock issues
|
||||
client = PersistentClient(path=CHROMA_DIR)
|
||||
collection = client.get_or_create_collection(name=COLLECTION_NAME)
|
||||
|
||||
# Load existing embeddings
|
||||
with open(EMBEDDING_FILE, "r") as f:
|
||||
embeddings_data = json.load(f)
|
||||
|
||||
imported_count = 0
|
||||
|
||||
# Ingest each document
|
||||
for filename, vector in embeddings_data.items():
|
||||
filepath = os.path.join(CONTENT_DIR, filename)
|
||||
try:
|
||||
with open(filepath, "r", encoding="utf-8") as f:
|
||||
text = f.read().strip()
|
||||
collection.add(
|
||||
documents=[text],
|
||||
metadatas=[{"filename": filename}],
|
||||
ids=[filename],
|
||||
embeddings=[vector]
|
||||
)
|
||||
imported_count += 1
|
||||
except FileNotFoundError:
|
||||
print(f"[WARN] Skipping missing file: {filepath}")
|
||||
|
||||
print(f"✅ Embeddings successfully imported into Chroma: {imported_count} documents")
|
||||
|
Reference in New Issue
Block a user