Spaces:

jedick
/

R-help-chat

Running on Zero

App Files Files Community

jedick commited on Jul 28

Commit

503a0b6

1 Parent(s): 17ad0bb

Improve args for thinking mode

Browse files

Files changed (5) hide show

app.py +18 -2
graph.py +11 -22
main.py +2 -4
mods/tool_calling_llm.py +1 -3
prompts.py +56 -48

app.py CHANGED Viewed

@@ -87,7 +87,9 @@ def run_workflow(input, history, compute_mode, thread_id, session_hash):
             )
         # Get the chat model and build the graph
         chat_model = GetChatModel(compute_mode)
-        graph_builder = BuildGraph(chat_model, compute_mode, search_type)
         # Compile the graph with an in-memory checkpointer
         memory = MemorySaver()
         graph = graph_builder.compile(checkpointer=memory)
@@ -398,7 +400,7 @@ with gr.Blocks(
             end = None
         info_text = f"""
             **Database:** {len(sources)} emails from {start} to {end}.
-            **Features:** RAG, today's date, hybrid search (dense+sparse), thinking display (local),
             multiple retrievals per turn (remote), answer with citations (remote), chat memory.
             **Tech:** LangChain + Hugging Face + Gradio; ChromaDB and BM25S-based retrievers.<br>
             """
@@ -537,6 +539,12 @@ with gr.Blocks(
         generate_thread_id,
         outputs=[thread_id],
         api_name=False,
     )
     input.submit(
@@ -563,6 +571,14 @@ with gr.Blocks(
         api_name=False,
     )
     # ------------
     # Data loading
     # ------------

             )
         # Get the chat model and build the graph
         chat_model = GetChatModel(compute_mode)
+        graph_builder = BuildGraph(
+            chat_model, compute_mode, search_type, think_query=True
+        )
         # Compile the graph with an in-memory checkpointer
         memory = MemorySaver()
         graph = graph_builder.compile(checkpointer=memory)
             end = None
         info_text = f"""
             **Database:** {len(sources)} emails from {start} to {end}.
+            **Features:** RAG, today's date, hybrid search (dense+sparse), thinking output (local),
             multiple retrievals per turn (remote), answer with citations (remote), chat memory.
             **Tech:** LangChain + Hugging Face + Gradio; ChromaDB and BM25S-based retrievers.<br>
             """
         generate_thread_id,
         outputs=[thread_id],
         api_name=False,
+    ).then(
+        # Focus textbox by updating the textbox with the current value
+        lambda x: gr.update(value=x),
+        [input],
+        [input],
+        api_name=False,
     )
     input.submit(
         api_name=False,
     )
+    chatbot.clear(
+        # Focus textbox when the chatbot is cleared
+        lambda x: gr.update(value=x),
+        [input],
+        [input],
+        api_name=False,
+    )
     # ------------
     # Data loading
     # ------------

graph.py CHANGED Viewed

@@ -71,23 +71,17 @@ def normalize_messages(messages):
     return messages
-def ToolifyHF(chat_model, system_message, system_message_suffix="", think=False):
     """
     Get a Hugging Face model ready for bind_tools().
     """
-    ## Add /no_think flag to turn off thinking mode (SmolLM3 and Qwen)
-    # if not think:
-    #    system_message = "/no_think\n" + system_message
     # Combine system prompt and tools template
     tool_system_prompt_template = system_message + generic_tools_template
     class HuggingFaceWithTools(ToolCallingLLM, ChatHuggingFace):
-        class Config:
-            # Allows adding attributes dynamically
-            extra = "allow"
     chat_model = HuggingFaceWithTools(
         llm=chat_model.llm,
@@ -96,9 +90,6 @@ def ToolifyHF(chat_model, system_message, system_message_suffix="", think=False)
         system_message_suffix=system_message_suffix,
     )
-    # The "model" attribute is needed for ToolCallingLLM to print the response if it can't be parsed
-    chat_model.model = chat_model.model_id + "_for_tools"
     return chat_model
@@ -107,8 +98,7 @@ def BuildGraph(
     compute_mode,
     search_type,
     top_k=6,
-    think_retrieve=False,
-    think_generate=False,
 ):
     """
     Build conversational RAG graph for email retrieval and answering with citations.
@@ -118,8 +108,7 @@ def BuildGraph(
         compute_mode: remote or local (for retriever)
         search_type: dense, sparse, or hybrid (for retriever)
         top_k: number of documents to retrieve
-        think_retrieve: Whether to use thinking mode for retrieval
-        think_generate: Whether to use thinking mode for generation
     Based on:
         https://python.langchain.com/docs/how_to/qa_sources
@@ -206,7 +195,7 @@ def BuildGraph(
     if is_local:
         # For local models (ChatHuggingFace with SmolLM, Gemma, or Qwen)
         query_model = ToolifyHF(
-            chat_model, query_prompt(compute_mode), "", think_retrieve
         ).bind_tools([retrieve_emails])
         # Don't use answer_with_citations tool because responses with are sometimes unparseable
         generate_model = chat_model
@@ -227,7 +216,7 @@ def BuildGraph(
             messages = normalize_messages(messages)
             # print_message_summaries(messages, "--- query: after normalization ---")
         else:
-            messages = [SystemMessage(query_prompt(compute_mode))] + state["messages"]
         response = query_model.invoke(messages)
         return {"messages": response}
@@ -239,12 +228,12 @@ def BuildGraph(
             # print_message_summaries(messages, "--- generate: before normalization ---")
             messages = normalize_messages(messages)
             # Add the system message here because we're not using tools
-            messages = [
-                SystemMessage(generate_prompt(with_tools=False, think=False))
-            ] + messages
             # print_message_summaries(messages, "--- generate: after normalization ---")
         else:
-            messages = [SystemMessage(generate_prompt())] + state["messages"]
         response = generate_model.invoke(messages)
         return {"messages": response}

     return messages
+def ToolifyHF(chat_model, system_message, system_message_suffix=""):
     """
     Get a Hugging Face model ready for bind_tools().
     """
     # Combine system prompt and tools template
     tool_system_prompt_template = system_message + generic_tools_template
     class HuggingFaceWithTools(ToolCallingLLM, ChatHuggingFace):
+        def __init__(self, **kwargs):
+            super().__init__(**kwargs)
     chat_model = HuggingFaceWithTools(
         llm=chat_model.llm,
         system_message_suffix=system_message_suffix,
     )
     return chat_model
     compute_mode,
     search_type,
     top_k=6,
+    think_query=False,
 ):
     """
     Build conversational RAG graph for email retrieval and answering with citations.
         compute_mode: remote or local (for retriever)
         search_type: dense, sparse, or hybrid (for retriever)
         top_k: number of documents to retrieve
+        think_query: Whether to use thinking mode for query
     Based on:
         https://python.langchain.com/docs/how_to/qa_sources
     if is_local:
         # For local models (ChatHuggingFace with SmolLM, Gemma, or Qwen)
         query_model = ToolifyHF(
+            chat_model, query_prompt(chat_model, think=think_query), ""
         ).bind_tools([retrieve_emails])
         # Don't use answer_with_citations tool because responses with are sometimes unparseable
         generate_model = chat_model
             messages = normalize_messages(messages)
             # print_message_summaries(messages, "--- query: after normalization ---")
         else:
+            messages = [SystemMessage(query_prompt(chat_model))] + state["messages"]
         response = query_model.invoke(messages)
         return {"messages": response}
             # print_message_summaries(messages, "--- generate: before normalization ---")
             messages = normalize_messages(messages)
             # Add the system message here because we're not using tools
+            messages = [SystemMessage(generate_prompt(chat_model))] + messages
             # print_message_summaries(messages, "--- generate: after normalization ---")
         else:
+            messages = [
+                SystemMessage(generate_prompt(chat_model, with_tools=True))
+            ] + state["messages"]
         response = generate_model.invoke(messages)
         return {"messages": response}

main.py CHANGED Viewed

@@ -200,10 +200,8 @@ def RunChain(
     # Get chat model (LLM)
     chat_model = GetChatModel(compute_mode)
-    # Control thinking for SmolLM3
-    system_prompt = generate_prompt()
-    if hasattr(chat_model, "model_id") and not think:
-        system_prompt = f"/no_think\n{system_prompt}"
     # Create a prompt template
     system_template = ChatPromptTemplate.from_messages([SystemMessage(system_prompt)])

     # Get chat model (LLM)
     chat_model = GetChatModel(compute_mode)
+    # Get prompt with /no_think for SmolLM3/Qwen
+    system_prompt = generate_prompt(chat_model)
     # Create a prompt template
     system_template = ChatPromptTemplate.from_messages([SystemMessage(system_prompt)])

mods/tool_calling_llm.py CHANGED Viewed

@@ -299,9 +299,7 @@ class ToolCallingLLM(BaseChatModel, ABC):
         )
         if called_tool is None:
             # Issue a warning and return the generated content 20250727 jmd
-            warnings.warn(
-                f"Tool {called_tool} called from {self.model} output not in functions list"
-            )
             return AIMessage(content=response_message.content)
         # Get tool arguments from output

         )
         if called_tool is None:
             # Issue a warning and return the generated content 20250727 jmd
+            warnings.warn(f"Called tool ({called_tool}) not in functions list")
             return AIMessage(content=response_message.content)
         # Get tool arguments from output

prompts.py CHANGED Viewed

@@ -3,65 +3,73 @@ from util import get_sources, get_start_end_months
 import re
-def query_prompt(compute_mode):
-    """Return system prompt for query step
-    Args:
-        compute_mode: Compute mode for embedding model (remote or local)
-    """
     # Get start and end months from database
     start, end = get_start_end_months(get_sources())
-    query_prompt = (
-        f"Today Date: {date.today()}."
-        "You are a helpful chatbot designed to get information about R programming from the R-help mailing list archives."
-        "Write a search query to retrieve emails relevant to the user's question."
-        "Do not answer the user's question and do not ask the user for more information."
-        # gpt-4o-mini thinks last two months aren't available with this: "Emails from from {start} to {end} are available for retrieval."
-        f"The emails available for retrieval are from {start} to {end}."
-        "For questions about differences or comparison between X and Y, retrieve emails about X and Y."
-        "For general summaries, use retrieve_emails(search_query='R')."
-        "For specific questions, use retrieve_emails(search_query=<specific topic>)."
-        "For questions about years, use retrieve_emails(search_query=, start_year=, end_year=) (this month is this year)."
-        "For questions about months, use 3-letter abbreviations (Jan..Dec) for the 'month' argument."
-        "Even if retrieved emails are available, you should retrieve more emails to answer the most recent question."  # Qwen
-        # "You must perform the search yourself. Do not tell the user how to retrieve emails."  # Qwen
-        "Do not use your memory or knowledge to answer the user's question. Only retrieve emails based on the user's question."  # Qwen
-        # "If you decide not to retrieve emails, tell the user why and suggest how to improve their question to chat with the R-help mailing list."
     )
-    # A sanity check that we don't have unassigned variables
-    # (this causes KeyError in parsing by ToolCallingLLM)
-    matches = re.findall(r"\{.*?\}", " ".join(query_prompt))
-    if matches:
-        raise ValueError(f"Unassigned variables in prompt: {' '.join(matches)}")
-    return query_prompt
-def generate_prompt(with_tools=True, think=True):
     """Return system prompt for generate step"""
-    generate_prompt = (
-        f"Today Date: {date.today()}."
-        "You are a helpful chatbot designed to answer questions about R programming based on the R-help mailing list archives."
-        "Summarize the retrieved emails to answer the user's question or query."
-        "If any of the retrieved emails are irrelevant (e.g. wrong dates), then do not use them."
-        "Tell the user if there are no retrieved emails or if you are unable to answer the question based on the information in the emails."
-        "Do not give an answer based on your own knowledge or memory, and do not include examples that aren't based on the retrieved emails."
-        "Example: For a question about using lm(), take examples of lm() from the retrieved emails to answer the user's question."
-        # "Do not respond with packages that are only listed under sessionInfo, session info, or other attached packages."
-        "Summarize the content of the emails rather than copying the headers."  # Qwen
-        "You must include inline citations (email senders and dates) in each part of your response."
-        "Only answer general questions about R if the answer is in the retrieved emails."
-        "Respond with 300 words maximum and 30 lines of code maximum and include any relevant URLs from the retrieved emails."
     )
     if with_tools:
-        generate_prompt += "Use answer_with_citations to provide the complete answer and all citations used."
-    if not think:
-        generate_prompt += "/no_think"
-    matches = re.findall(r"\{.*?\}", " ".join(generate_prompt))
-    if matches:
-        raise ValueError(f"Unassigned variables in prompt: {' '.join(matches)}")
-    return generate_prompt
 # Prompt template for SmolLM3 with tools

 import re
+def check_prompt(prompt, chat_model, think):
+    """Check for unassigned variables and add /no_think if needed"""
+    # A sanity check that we don't have unassigned variables
+    # (this causes KeyError in parsing by ToolCallingLLM)
+    matches = re.findall(r"\{.*?\}", " ".join(prompt))
+    if matches:
+        raise ValueError(f"Unassigned variables in prompt: {' '.join(matches)}")
+    # Check if we should add /no_think to turn off thinking mode
+    if hasattr(chat_model, "model_id"):
+        model_id = chat_model.model_id
+        if ("SmolLM" in model_id or "Qwen" in model_id) and not think:
+            prompt = "/no_think\n" + prompt
+    return prompt
+def query_prompt(chat_model, think=False):
+    """Return system prompt for query step"""
     # Get start and end months from database
     start, end = get_start_end_months(get_sources())
+    prompt = (
+        f"Today Date: {date.today()}. "
+        "You are a helpful assistant designed to get information about R programming from the R-help mailing list archives. "
+        "Write a search query to retrieve emails relevant to the user's question. "
+        "Do not answer the user's question and do not ask the user for more information. "
+        # gpt-4o-mini thinks last two months aren't available with this: "Emails from from {start} to {end} are available for retrieval. "
+        f"The emails available for retrieval are from {start} to {end}. "
+        "For questions about differences or comparison between X and Y, retrieve emails about X and Y. "
+        "For general summaries, use retrieve_emails(search_query='R'). "
+        "For specific questions, use retrieve_emails(search_query=<specific topic>). "
+        "For questions about years, use retrieve_emails(search_query=, start_year=, end_year=) (this month is this year). "
+        "For questions about months, use 3-letter abbreviations (Jan...Dec) for the 'month' argument. "
+        "Even if retrieved emails are available, you should retrieve more emails to answer the most recent question. "  # Qwen
+        # "You must perform the search yourself. Do not tell the user how to retrieve emails. "  # Qwen
+        "Do not use your memory or knowledge to answer the user's question. Only retrieve emails based on the user's question. "  # Qwen
+        # "If you decide not to retrieve emails, tell the user why and suggest how to improve their question to chat with the R-help mailing list. "
     )
+    prompt = check_prompt(prompt, chat_model, think)
+    return prompt
+def generate_prompt(chat_model, think=False, with_tools=False):
     """Return system prompt for generate step"""
+    prompt = (
+        f"Today Date: {date.today()}. "
+        "You are a helpful chatbot designed to answer questions about R programming based on the R-help mailing list archives. "
+        "Summarize the retrieved emails to answer the user's question or query. "
+        "If any of the retrieved emails are irrelevant (e.g. wrong dates), then do not use them. "
+        "Tell the user if there are no retrieved emails or if you are unable to answer the question based on the information in the emails. "
+        "Do not give an answer based on your own knowledge or memory, and do not include examples that aren't based on the retrieved emails. "
+        "Example: For a question about using lm(), take examples of lm() from the retrieved emails to answer the user's question. "
+        # "Do not respond with packages that are only listed under sessionInfo, session info, or other attached packages. "
+        "Summarize the content of the emails rather than copying the headers. "  # Qwen
+        "You must include inline citations (email senders and dates) in each part of your response. "
+        "Only answer general questions about R if the answer is in the retrieved emails. "
+        "Respond with 300 words maximum and 30 lines of code maximum and include any relevant URLs from the retrieved emails. "
     )
     if with_tools:
+        prompt = (
+            f"{prompt}"
+            "Use answer_with_citations to provide the complete answer and all citations used. "
+        )
+    prompt = check_prompt(prompt, chat_model, think)
+    return prompt
 # Prompt template for SmolLM3 with tools