import gradio as gr
from models import stream_generate_response
# Header Link
ANYCODER_LINK = "Built with anycoder"
with gr.Blocks(title="KAT-Dev Chat", theme=gr.themes.Soft()) as demo:
gr.HTML(
f"""
💬 KAT-Dev LLM Chat
Powered by Kwaipilot/KAT-Dev, a large language model. This application uses Hugging Face ZeroGPU for highly efficient inference.
{ANYCODER_LINK}
"""
)
# ChatInterface handles the full conversational UI, streaming, and history management
chat_interface = gr.ChatInterface(
fn=stream_generate_response,
title="", # Title moved to HTML block
chatbot=gr.Chatbot(
height=500,
show_copy_button=True,
layout="bubble"
),
textbox=gr.Textbox(
placeholder="Ask the KAT model anything...",
container=False,
scale=7
),
# Disable the default submit button text since we have an icon
submit_btn=True,
stop_btn=True,
# Concurrency limit handled by @spaces.GPU
concurrency_limit=10,
)
demo.queue()
demo.launch()