from transformers import pipeline import gradio as gr # Load your model with pipeline pipe = pipeline("text-generation", model="prithivMLmods/rStar-Coder-Qwen3-0.6B") # Keep a conversation history history = [] def chat_fn(user_input): global history # Add user message history.append(f"User: {user_input}") # Combine all previous messages as context context = "\n".join(history) + "\nBot:" # Generate response output = pipe(context, max_new_tokens=200, do_sample=True, top_p=0.9)[0]['generated_text'] # Extract only the bot's reply (after last "Bot:") bot_reply = output.split("Bot:")[-1].strip() # Add bot reply to history history.append(f"Bot: {bot_reply}") return bot_reply # Gradio interface with gr.Blocks() as demo: chatbot_ui = gr.Chatbot() msg = gr.Textbox(placeholder="Type a message...") def respond(user_input, chat_history): reply = chat_fn(user_input) chat_history.append((user_input, reply)) return chat_history, chat_history state = gr.State([]) # store chat history in Gradio msg.submit(respond, [msg, state], [chatbot_ui, state]) demo.launch()