File size: 2,615 Bytes
3c1111f
 
 
 
 
4fb008b
 
 
 
 
 
 
 
 
 
 
3c1111f
9e28279
3c1111f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
from langchain.document_loaders import DirectoryLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import FAISS
from langchain.llms import Ollama  

loader = DirectoryLoader('.', glob="all_dialogues.txt") 
docs = loader.load()

text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=1000, chunk_overlap=200
)
texts = text_splitter.split_documents(docs)
embeddings = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")


db = FAISS.load_local(
    folder_path="./",
    embeddings=HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2"),
    allow_dangerous_deserialization=True  
)

from langchain.retrievers import BM25Retriever, EnsembleRetriever

# Vector Store Retriever
vector_retriever = db.as_retriever(search_kwargs={"k": 3})

# Keyword Retriever (BM25)
bm25_retriever = BM25Retriever.from_documents(texts)
bm25_retriever.k = 2

# Combine both
ensemble_retriever = EnsembleRetriever(
    retrievers=[vector_retriever, bm25_retriever],
    weights=[0.6, 0.4]  # Tune based on your tests
)

# Use in ask_question()


from langchain_community.llms import Ollama
from langchain_core.prompts import ChatPromptTemplate

from langchain_community.llms import Ollama
from langchain_core.prompts import ChatPromptTemplate

def respond_rag_ollama(
    message: str,
    history: list[tuple[str, str]],
    system_message: str,
    num_ctx: int = 2048,
    num_predict: int = 128,
    temperature: float = 0.8,
    top_k: int = 40,
    repeat_penalty: float = 1.1,
    stop: list[str] | None = None,
):
    # 1. Retrieve relevant context from your vector DB
    docs = ensemble_retriever.get_relevant_documents(message)
    context = "\n\n".join(doc.page_content for doc in docs)

    # 2. Build a conversational prompt
    prompt_template = ChatPromptTemplate.from_messages([
        ("system", system_message),
        ("human", f"""Context: {{context}}

        Question: {{question}}

        Rules:
        - If the answer isn't in the context, respond with "I don't know"
        - Keep answers under 5 sentences
        - Include book/season references when possible""")
    ])

    # 3. Configure the Ollama LLM with adjustable parameters
    llm = Ollama(
        model="llama3:8b-instruct-q4_0",
        temperature=temperature,
        num_ctx=num_ctx,
        num_predict=num_predict,
        top_k=top_k,
        repeat_penalty=repeat_penalty,
        stop= ["<|eot_id|>"],
    )


    chain = prompt_template | llm
    yield from chain.stream_invoke({"context": context, "question": message})