from langchain_community.document_loaders import DirectoryLoader from langchain_community.embeddings import HuggingFaceEmbeddings from langchain_community.vectorstores import FAISS from langchain_community.retrievers import BM25Retriever from langchain_community.llms import Ollama from langchain_text_splitters import RecursiveCharacterTextSplitter from transformers import pipeline, AutoTokenizer, AutoModelForCausalLM from langchain_community.llms import HuggingFacePipeline from langchain_core.prompts import ChatPromptTemplate from langchain_community.document_loaders import DirectoryLoader from langchain_text_splitters import RecursiveCharacterTextSplitter from langchain_community.embeddings import HuggingFaceEmbeddings from langchain_community.vectorstores import FAISS from langchain_community.retrievers import BM25Retriever import google.generativeai as genai import os import google.generativeai as genai # Initialize Gemini genai.configure(api_key=os.environ.get("GEMINI_API_KEY")) # Replace this with actual key or environment-safe config model = genai.GenerativeModel("gemini-2.5-flash") # Define your RAG response function loader = DirectoryLoader('.', glob="all_dialogues.txt") docs = loader.load() text_splitter = RecursiveCharacterTextSplitter( chunk_size=300, chunk_overlap=100 ) texts = text_splitter.split_documents(docs) embeddings = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2") db = FAISS.load_local( folder_path="./", embeddings=HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2"), allow_dangerous_deserialization=True ) # Vector Store Retriever vector_retriever = db.as_retriever(search_kwargs={"k": 10}) # Keyword Retriever (BM25) bm25_retriever = BM25Retriever.from_documents(texts) bm25_retriever.k = 5 # Combine both def ensemble_retriever(query): vector_docs = vector_retriever.invoke(query) bm25_docs = bm25_retriever.invoke(query) combined_docs = vector_docs + bm25_docs return combined_docs # Use in ask_question() from langchain_community.llms import HuggingFaceHub from langchain_core.prompts import ChatPromptTemplate def respond_rag_huggingface(message: str): docs = ensemble_retriever(message) context = "\n\n".join(doc.page_content for doc in docs) system_message = os.environ.get("SYSTEM_MESSAGE", "You are a Game of Thrones maester and Harry Potter's Dumbledore. " + "Answer the given question based on your knowledge, providing accurate details without mentioning any specific sources or context used. " + "State how much you know about the topic, and do not provide faulty answers. " + "If the answer is unclear, clarify what you mean rather than saying 'I do not know.'") prompt = f"""{system_message} Question: {message} Rules: - Do not mention the context or where the information comes from - State how much you know about the topic (e.g., 'I have detailed knowledge,' 'I have some knowledge,' or 'My knowledge is limited') - Keep answers under 5 sentences - Include book/season references when possible - Answer based on relevant knowledge from Game of Thrones and Harry Potter """ response = model.generate_content(prompt) return response.text __all__ = ["respond_rag_huggingface"] # def respond_rag_ollama( # message: str, # system_message: str = "you are game of thrones measter answer the given question strictly based on the context provived.if u donot know the answer reply i dont know donot give gibberish answers ", # num_ctx: int = 2048, # num_predict: int = 128, # temperature: float = 0.8, # top_k: int = 40, # repeat_penalty: float = 1.1, # stop: list[str] | None = None, # ): # partial_response="" # # 1. Retrieve relevant context from your vector DB # docs = ensemble_retriever.get_relevant_documents(message) # context = "\n\n".join(doc.page_content for doc in docs) # # 2. Build a conversational prompt # prompt_template = ChatPromptTemplate.from_messages([ # ("system", system_message), # ("human", f"""Context: {{context}} # Question: {{question}} # Rules: # - If the answer isn't in the context, respond with "I don't know" # - Keep answers under 5 sentences # - Include book/season references when possible""") # ]) # # 3. Configure the Ollama LLM with adjustable parameters # llm = Ollama( # model="llama3:8b-instruct-q4_0", # temperature=temperature, # num_ctx=num_ctx, # num_predict=num_predict, # top_k=top_k, # repeat_penalty=repeat_penalty, # stop= ["<|eot_id|>"], # ) # chain = prompt_template | llm # response = chain.invoke({"context": context, "question": message}) # return response.content