File size: 4,811 Bytes
c9ba4ba
 
 
8006371
c9ba4ba
ff578e8
4fb008b
5fd30d3
 
 
 
 
 
 
 
 
2c9a851
 
 
 
 
 
 
 
 
9c820f3
2c9a851
5fd30d3
 
 
 
8006371
4fb008b
 
 
 
f786957
4fb008b
 
 
 
 
3c1111f
9e28279
3c1111f
 
 
 
c9ba4ba
3c1111f
 
f786957
3c1111f
 
 
cab5395
3c1111f
 
8006371
70c72f6
 
 
8006371
3c1111f
70c72f6
3c1111f
 
 
 
61176a6
 
3c1111f
5fd30d3
d4cf899
8006371
3c1111f
 
2c9a851
f786957
 
 
 
2c9a851
 
 
 
 
f786957
 
2c9a851
cab5395
f786957
cab5395
3c1111f
2c9a851
 
3c1111f
61176a6
5fd30d3
c9ba4ba
61176a6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
from langchain_community.document_loaders import DirectoryLoader
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain_community.vectorstores import FAISS
from langchain_community.retrievers import BM25Retriever
from langchain_community.llms import Ollama
from langchain_text_splitters import RecursiveCharacterTextSplitter

from transformers import pipeline, AutoTokenizer, AutoModelForCausalLM
from langchain_community.llms import HuggingFacePipeline
from langchain_core.prompts import ChatPromptTemplate
from langchain_community.document_loaders import DirectoryLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain_community.vectorstores import FAISS
from langchain_community.retrievers import BM25Retriever

import google.generativeai as genai
import os
import google.generativeai as genai

# Initialize Gemini
genai.configure(api_key=os.environ.get("GEMINI_API_KEY"))  # Replace this with actual key or environment-safe config



model = genai.GenerativeModel("gemini-2.5-flash")



# Define your RAG response function


loader = DirectoryLoader('.', glob="all_dialogues.txt") 
docs = loader.load()

text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=300, chunk_overlap=100
)
texts = text_splitter.split_documents(docs)
embeddings = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")


db = FAISS.load_local(
    folder_path="./",
    embeddings=HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2"),
    allow_dangerous_deserialization=True  
)



# Vector Store Retriever
vector_retriever = db.as_retriever(search_kwargs={"k": 10})

# Keyword Retriever (BM25)
bm25_retriever = BM25Retriever.from_documents(texts)
bm25_retriever.k = 5

# Combine both
def ensemble_retriever(query):
    vector_docs = vector_retriever.invoke(query)
    bm25_docs = bm25_retriever.invoke(query)
    combined_docs = vector_docs + bm25_docs
    return combined_docs


# Use in ask_question()



from langchain_community.llms import HuggingFaceHub
from langchain_core.prompts import ChatPromptTemplate


def respond_rag_huggingface(message: str):
    docs = ensemble_retriever(message)
    context = "\n\n".join(doc.page_content for doc in docs)

    system_message = os.environ.get("SYSTEM_MESSAGE", 
        "You are a Game of Thrones maester and Harry Potter's Dumbledore. " +
        "Answer the given question based on your knowledge, providing accurate details without mentioning any specific sources or context used. " +
        "State how much you know about the topic, and do not provide faulty answers. " +
        "If the answer is unclear, clarify what you mean rather than saying 'I do not know.'")

    prompt = f"""{system_message}
Question:
{message}
Rules:
- Do not mention the context or where the information comes from
- State how much you know about the topic (e.g., 'I have detailed knowledge,' 'I have some knowledge,' or 'My knowledge is limited')
- Keep answers under 5 sentences
- Include book/season references when possible
- Answer based on relevant knowledge from Game of Thrones and Harry Potter
"""

    response = model.generate_content(prompt)
    return response.text



__all__ = ["respond_rag_huggingface"]
# def respond_rag_ollama(
#     message: str,
#     system_message: str = "you are game of thrones measter answer the given question strictly based on the context provived.if u donot know the answer reply i dont know donot give gibberish answers ",
#     num_ctx: int = 2048,
#     num_predict: int = 128,
#     temperature: float = 0.8,
#     top_k: int = 40,
#     repeat_penalty: float = 1.1,
#     stop: list[str] | None = None,
# ):
#     partial_response=""
#     # 1. Retrieve relevant context from your vector DB
#     docs = ensemble_retriever.get_relevant_documents(message)
#     context = "\n\n".join(doc.page_content for doc in docs)

    # # 2. Build a conversational prompt
    # prompt_template = ChatPromptTemplate.from_messages([
    #     ("system", system_message),
    #     ("human", f"""Context: {{context}}

    #     Question: {{question}}

    #     Rules:
    #     - If the answer isn't in the context, respond with "I don't know"
    #     - Keep answers under 5 sentences
    #     - Include book/season references when possible""")
    # ])

    # # 3. Configure the Ollama LLM with adjustable parameters
    # llm = Ollama(
    #     model="llama3:8b-instruct-q4_0",
    #     temperature=temperature,
    #     num_ctx=num_ctx,
    #     num_predict=num_predict,
    #     top_k=top_k,
    #     repeat_penalty=repeat_penalty,
    #     stop= ["<|eot_id|>"],
    # )


    # chain = prompt_template | llm
    # response = chain.invoke({"context": context, "question": message})

    # return response.content