Spaces:
Running
on
Zero
Running
on
Zero
| import gradio as gr | |
| import spaces | |
| from transformers import pipeline | |
| import torch | |
| MODEL_ID = "LLM360/K2-Think" | |
| pipe = pipeline( | |
| "text-generation", | |
| model=MODEL_ID, | |
| torch_dtype="auto", | |
| device_map="auto", | |
| ) | |
| def respond(message, history): | |
| if history is None: | |
| history = [] | |
| new_history = history + [{"role": "user", "content": message}] | |
| outputs = pipe( | |
| new_history, | |
| max_new_tokens=32768, | |
| ) | |
| response = outputs[0]["generated_text"][-1]["content"] | |
| new_history.append({"role": "assistant", "content": response}) | |
| return "", new_history | |
| with gr.Blocks(title="K2-Think Chat") as demo: | |
| gr.Markdown("# K2-Think Chat App") | |
| chatbot = gr.Chatbot(type="messages", height=500) | |
| msg = gr.Textbox(placeholder="Type your message here...", scale=7) | |
| clear_btn = gr.Button("Clear Chat") | |
| msg.submit(respond, [msg, chatbot], [msg, chatbot]) | |
| clear_btn.click(lambda: None, None, chatbot, queue=False) | |
| if __name__ == "__main__": | |
| demo.launch() |