import gradio as gr from models import stream_generate_response # Header Link ANYCODER_LINK = "Built with anycoder" with gr.Blocks(title="KAT-Dev Chat", theme=gr.themes.Soft()) as demo: gr.HTML( f"""

💬 KAT-Dev LLM Chat

Powered by Kwaipilot/KAT-Dev, a large language model. This application uses Hugging Face ZeroGPU for highly efficient inference.

{ANYCODER_LINK}
""" ) # ChatInterface handles the full conversational UI, streaming, and history management chat_interface = gr.ChatInterface( fn=stream_generate_response, title="", # Title moved to HTML block chatbot=gr.Chatbot( height=500, show_copy_button=True, layout="bubble" ), textbox=gr.Textbox( placeholder="Ask the KAT model anything...", container=False, scale=7 ), # Disable the default submit button text since we have an icon submit_btn=True, stop_btn=True, # Concurrency limit handled by @spaces.GPU concurrency_limit=10, ) demo.queue() demo.launch()