Spaces:

hash-map
/

Game_of_thrones_q_and_a_bot

Sleeping

App Files Files Community

hash-map commited on Aug 8

Commit

5fd30d3

verified ·

1 Parent(s): f008f3f

Update rag.py

Browse files

Files changed (1) hide show

rag.py +32 -20

rag.py CHANGED Viewed

@@ -5,6 +5,34 @@ from langchain_community.retrievers import BM25Retriever
 from langchain_community.llms import Ollama
 from langchain_text_splitters import RecursiveCharacterTextSplitter
 loader = DirectoryLoader('.', glob="all_dialogues.txt")
 docs = loader.load()
@@ -47,19 +75,13 @@ def ensemble_retriever(query):
 from langchain_community.llms import HuggingFaceHub
 from langchain_core.prompts import ChatPromptTemplate
-def respond_rag_huggingface(
-    message: str,
-    system_message: str = " you are game of thrones measter answer the given question strictly based on the context provived.if u donot know the answer reply i dont know donot give gibberish answers",
-    num_predict: int = 128,
-    temperature: float = 0.8,
-):
-    # 1. Retrieve context
     docs = ensemble_retriever(message)
     context = "\n\n".join(doc.page_content for doc in docs)
-    # 2. Prompt
     prompt_template = ChatPromptTemplate.from_messages([
-        ("system", system_message),
         ("human", """Context: {context}
         Question: {question}
@@ -70,21 +92,11 @@ def respond_rag_huggingface(
         - Include book/season references when possible""")
     ])
-    # 3. HuggingFace LLM (e.g., use `HuggingFaceH4/zephyr-7b-beta`)
-    llm = HuggingFaceHub(
-        repo_id="mistralai/Mistral-7B-Instruct-v0.1",
-        model_kwargs={
-            "temperature": temperature,
-            "max_new_tokens": num_predict
-        }
-    )
-    # 4. Run chain
     chain = prompt_template | llm
     response = chain.invoke({"context": context, "question": message})
     return response.content
 __all__ = ["respond_rag_huggingface"]
 # def respond_rag_ollama(
 #     message: str,

 from langchain_community.llms import Ollama
 from langchain_text_splitters import RecursiveCharacterTextSplitter
+from transformers import pipeline, AutoTokenizer, AutoModelForCausalLM
+from langchain_community.llms import HuggingFacePipeline
+from langchain_core.prompts import ChatPromptTemplate
+from langchain_community.document_loaders import DirectoryLoader
+from langchain_text_splitters import RecursiveCharacterTextSplitter
+from langchain_community.embeddings import HuggingFaceEmbeddings
+from langchain_community.vectorstores import FAISS
+from langchain_community.retrievers import BM25Retriever
+# Load Zephyr model
+tokenizer = AutoTokenizer.from_pretrained("HuggingFaceH4/zephyr-7b-beta")
+model = AutoModelForCausalLM.from_pretrained("HuggingFaceH4/zephyr-7b-beta")
+# Create HF pipeline
+hf_pipeline = pipeline(
+    "text-generation",
+    model=model,
+    tokenizer=tokenizer,
+    max_new_tokens=128,
+    temperature=0.8,
+    pad_token_id=tokenizer.eos_token_id,
+)
+# Wrap in LangChain LLM
+llm = HuggingFacePipeline(pipeline=hf_pipeline)
+# Define your RAG response function
 loader = DirectoryLoader('.', glob="all_dialogues.txt")
 docs = loader.load()
 from langchain_community.llms import HuggingFaceHub
 from langchain_core.prompts import ChatPromptTemplate
+ def respond_rag_huggingface(message: str):
     docs = ensemble_retriever(message)
     context = "\n\n".join(doc.page_content for doc in docs)
     prompt_template = ChatPromptTemplate.from_messages([
+        ("system", "you are game of thrones measter answer the given question strictly based on the context provived.if u donot know the answer reply i dont know donot give gibberish answers"),
         ("human", """Context: {context}
         Question: {question}
         - Include book/season references when possible""")
     ])
     chain = prompt_template | llm
     response = chain.invoke({"context": context, "question": message})
     return response.content
 __all__ = ["respond_rag_huggingface"]
 # def respond_rag_ollama(
 #     message: str,