import gradio as gr
from rag import respond_rag_ollama  # Your custom RAG function using Ollama

history = []

def respond(message, chat_history, temperature=0.5, max_tokens=512  ):
    response = respond_rag_ollama(message, temperature=temperature,num_predict=max_tokens)  # send only message & temperature
    chat_history.append((message, response))
    return "", chat_history

with gr.Blocks() as demo:
    gr.Markdown("# Game of Thrones Q&A bot")

    chatbot = gr.Chatbot()
    msg = gr.Textbox(label="Tell me something about 7 kingdoms")

    temp = gr.Slider(0.0, 1.0, value=0.5, label="Temperature")
    max_tokens = gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens")

    clear = gr.Button("Clear")

    msg.submit(respond, [msg, chatbot, temp, max_tokens], [msg, chatbot])
    clear.click(lambda: ([], ""), None, [chatbot, msg])
demo.launch()