import gradio as gr from rag import respond_rag_ollama # Your custom RAG function using Ollama history = [] def respond(message, chat_history, temperature=0.5, max_tokens=512 ): response = respond_rag_ollama(message, temperature=temperature,num_predict=max_tokens) # send only message & temperature chat_history.append((message, response)) return "", chat_history with gr.Blocks() as demo: gr.Markdown("# Game of Thrones Q&A bot") chatbot = gr.Chatbot() msg = gr.Textbox(label="Tell me something about 7 kingdoms") temp = gr.Slider(0.0, 1.0, value=0.5, label="Temperature") max_tokens = gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens") clear = gr.Button("Clear") msg.submit(respond, [msg, chatbot, temp, max_tokens], [msg, chatbot]) clear.click(lambda: ([], ""), None, [chatbot, msg]) demo.launch()