File size: 2,615 Bytes
3c1111f 4fb008b 3c1111f 9e28279 3c1111f |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 |
from langchain.document_loaders import DirectoryLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import FAISS
from langchain.llms import Ollama
loader = DirectoryLoader('.', glob="all_dialogues.txt")
docs = loader.load()
text_splitter = RecursiveCharacterTextSplitter(
chunk_size=1000, chunk_overlap=200
)
texts = text_splitter.split_documents(docs)
embeddings = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")
db = FAISS.load_local(
folder_path="./",
embeddings=HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2"),
allow_dangerous_deserialization=True
)
from langchain.retrievers import BM25Retriever, EnsembleRetriever
# Vector Store Retriever
vector_retriever = db.as_retriever(search_kwargs={"k": 3})
# Keyword Retriever (BM25)
bm25_retriever = BM25Retriever.from_documents(texts)
bm25_retriever.k = 2
# Combine both
ensemble_retriever = EnsembleRetriever(
retrievers=[vector_retriever, bm25_retriever],
weights=[0.6, 0.4] # Tune based on your tests
)
# Use in ask_question()
from langchain_community.llms import Ollama
from langchain_core.prompts import ChatPromptTemplate
from langchain_community.llms import Ollama
from langchain_core.prompts import ChatPromptTemplate
def respond_rag_ollama(
message: str,
history: list[tuple[str, str]],
system_message: str,
num_ctx: int = 2048,
num_predict: int = 128,
temperature: float = 0.8,
top_k: int = 40,
repeat_penalty: float = 1.1,
stop: list[str] | None = None,
):
# 1. Retrieve relevant context from your vector DB
docs = ensemble_retriever.get_relevant_documents(message)
context = "\n\n".join(doc.page_content for doc in docs)
# 2. Build a conversational prompt
prompt_template = ChatPromptTemplate.from_messages([
("system", system_message),
("human", f"""Context: {{context}}
Question: {{question}}
Rules:
- If the answer isn't in the context, respond with "I don't know"
- Keep answers under 5 sentences
- Include book/season references when possible""")
])
# 3. Configure the Ollama LLM with adjustable parameters
llm = Ollama(
model="llama3:8b-instruct-q4_0",
temperature=temperature,
num_ctx=num_ctx,
num_predict=num_predict,
top_k=top_k,
repeat_penalty=repeat_penalty,
stop= ["<|eot_id|>"],
)
chain = prompt_template | llm
yield from chain.stream_invoke({"context": context, "question": message})
|