Spaces:
Sleeping
Sleeping
| import os | |
| from langchain_text_splitters import RecursiveCharacterTextSplitter | |
| from langchain_openai import OpenAIEmbeddings | |
| from langchain_chroma import Chroma | |
| def get_embeddings(): | |
| """Initialize and return OpenAI embeddings.""" | |
| return OpenAIEmbeddings(model="text-embedding-3-large") | |
| def load_or_create_vectorstore(docs, embeddings,path): | |
| """Load or create a Chroma vectorstore.""" | |
| if os.path.exists(path): | |
| print("Loading existing Chroma vector store from disk...") | |
| return Chroma(persist_directory=path, embedding_function=embeddings) | |
| # Split documents if vectorstore doesn't exist | |
| text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=0) | |
| all_splits = text_splitter.split_documents(docs) | |
| print(f"Documents are split into {len(all_splits)} chunks from {len(docs)} documents.") | |
| # Create new vectorstore | |
| print("Creating new Chroma vector store...") | |
| vectorstore = Chroma.from_documents(documents=all_splits, embedding=embeddings, persist_directory=path) | |
| print(f"Vectorstore created and saved to {path}") | |
| return vectorstore | |