Falcon-H1 Playground

import os
from datetime import date
import gradio as gr
import openai

# Model configuration dictionary
MODEL_CONFIGS = {
    "Falcon-H1-34B-Instruct": {
        "model_id": "tiiuae/Falcon-H1-34B-Instruct",
        "api_key_env": "XXL_API_KEY",
        "base_url_env": "XXL_URL",
        "description": "XXL (34B)"
    },
    "Falcon-H1-7B-Instruct": {
        "model_id": "tiiuae/Falcon-H1-7B-Instruct",
        "api_key_env": "L_API_KEY",
        "base_url_env": "L_URL",
        "description": "L (7B)"
    },
    "Falcon-H1-3B-Instruct": {
        "model_id": "tiiuae/Falcon-H1-3B-Instruct",
        "api_key_env": "M_API_KEY",
        "base_url_env": "M_URL",
        "description": "M (3B)"
    },
    "Falcon-H1-1.5B-Deep-Instruct": {
        "model_id": "tiiuae/Falcon-H1-1.5B-Deep-Instruct",
        "api_key_env": "S_API_KEY",
        "base_url_env": "S_URL",
        "description": "S (1.5B Deep)"
    },
    "Falcon-H1-1.5B-Instruct": {
        "model_id": "tiiuae/Falcon-H1-1.5B-Instruct",
        "api_key_env": "XS_API_KEY",
        "base_url_env": "XS_URL",
        "description": "XS (1.5B)"
    },
    "Falcon-H1-0.5B-Instruct": {
        "model_id": "tiiuae/Falcon-H1-0.5B-Instruct",
        "api_key_env": "XXS_API_KEY",
        "base_url_env": "XXS_URL",
        "description": "XXS (0.5B)"
    },
}

today = date.today()
SYSTEM_PROMPT = os.getenv("SYSTEM_PROMPT")

# CSS styling more similar to the second code
CSS = """
/* Main style improvements */
.container {
    max-width: 900px !important;
    margin-left: auto !important;
    margin-right: auto !important;
}

/* Title styling */
h1 {
    background: linear-gradient(90deg, #4776E6 0%, #8E54E9 100%);
    -webkit-background-clip: text;
    -webkit-text-fill-color: transparent;
    font-weight: 700 !important;
    text-align: center;
    margin-bottom: 0.5rem !important;
}

.subtitle {
    text-align: center;
    color: #666;
    margin-bottom: 1rem;
}

/* Button styling */
.duplicate-button {
    margin: 1rem auto !important;
    display: block !important;
    color: #fff !important;
    background: linear-gradient(90deg, #4776E6 0%, #8E54E9 100%) !important;
    border-radius: 100vh !important;
    padding: 0.5rem 1.5rem !important;
    font-weight: 600 !important;
    border: none !important;
    box-shadow: 0 4px 6px rgba(50, 50, 93, 0.11), 0 1px 3px rgba(0, 0, 0, 0.08) !important;
}

/* Parameter accordion styling */
.accordion {
    border-radius: 8px !important;
    overflow: hidden !important;
    box-shadow: 0 1px 3px rgba(0,0,0,0.1) !important;
    margin-bottom: 1rem !important;
}

h3 {
    text-align: center;
}
"""

TITLE = "<h1>Falcon-H1 Playground</h1>"
SUB_TITLE = """
<p class='subtitle'>Falcon-H1 is a new SoTA hybrid model by TII in Abu Dhabi. It is open source and available on Hugging Face. This demo is powered by <a href="https://openinnovation.ai">OpenInnovationAI</a>. Try out our <a href="https://chat.falconllm.tii.ae/auth">chat interface</a>.</p>
<p class='subtitle' style='font-size: 0.9rem; color: #888;'>Today: {today_date}</p>
"""

def stream_chat(
    message: str,
    history: list,
    model_name: str,
    temperature: float = 0.1,
    max_new_tokens: int = 1024,
    top_p: float = 1.0,
    top_k: int = 20,
    presence_penalty: float = 1.2,
):
    """Chat function that streams responses from the selected model"""
    
    cfg = MODEL_CONFIGS[model_name]
    api_key = os.getenv(cfg["api_key_env"])
    base_url = os.getenv(cfg.get("base_url_env", ""), None)
    
    if not api_key:
        yield f"❌ Env-var `{cfg['api_key_env']}` not set."
        return
        
    if cfg.get("base_url_env") and not base_url:
        yield f"❌ Env-var `{cfg['base_url_env']}` not set."
        return
        
    client = openai.OpenAI(api_key=api_key, base_url=base_url)
    
    msgs = [{"role": "system", "content": SYSTEM_PROMPT},]
    for u, a in history:
        msgs += [{"role": "user", "content": u},
                {"role": "assistant", "content": a}]
    msgs.append({"role": "user", "content": message})
    
    try:
        stream = client.chat.completions.create(
            model=cfg["model_id"],
            messages=msgs,
            temperature=temperature,
            top_p=top_p,
            max_tokens=max_new_tokens,
            presence_penalty=presence_penalty,
            stream=True,
        )
        
        partial = ""
        for chunk in stream:
            if (delta := chunk.choices[0].delta).content:
                partial += delta.content
                yield partial
    except Exception as e:
        yield f"❌ Error: {str(e)}"

# Create the Gradio interface
with gr.Blocks(css=CSS, theme="soft") as demo:
    # Header section
    gr.HTML(TITLE)
    gr.HTML(SUB_TITLE.format(today_date=today.strftime('%B %d, %Y')))
    
    gr.DuplicateButton(value="Duplicate Space", elem_classes="duplicate-button")

    # Create model selection with descriptions
    model_options = list(MODEL_CONFIGS.keys())
    
    with gr.Row():
        model_dropdown = gr.Dropdown(
            choices=model_options,
            value=model_options[0],
            label="Select Falcon-H1 Model",
            info="Choose which model checkpoint to use"
        )
    
    # Create chatbot
    chatbot = gr.Chatbot(height=600, latex_delimiters=[
        {"left": "$$", "right": "$$", "display": True},  # For display mode math
        {"left": "$", "right": "$", "display": False},   # For inline math
        {"left": "\\(", "right": "\\)", "display": False}, # Common inline delimiters
        {"left": "\\[", "right": "\\]", "display": True}   # Common display delimiters
        ]
        )
    
    # Message input area with a cleaner layout
    with gr.Row():
        with gr.Column(scale=0.85):
            msg = gr.Textbox(
                scale=1,
                show_label=False,
                placeholder="Enter text and press enter",
                container=False
            )
        with gr.Column(scale=0.15, min_width=0):
            submit_btn = gr.Button("Submit", variant="primary")
    
    # Parameters in accordion similar to second code
    with gr.Accordion("⚙️ Parameters", open=False, elem_classes="accordion"):
        max_new_tokens = gr.Slider(minimum=64, maximum=4096*8, value=1024, step=64, label="Max new tokens", info="Maximum length of generated response")
        top_p = gr.Slider(minimum=0, maximum=1, value=1.0, step=0.05, label="top_p", info="1.0 means no filtering")
        top_k = gr.Slider(minimum=1, maximum=20, step=1, value=20, label="top_k")
        presence_penalty = gr.Slider(minimum=0, maximum=2, value=1.2, step=0.1, label="Presence penalty", info="Penalizes repetition")
    
    # Examples section
    gr.Examples(
        examples=[
            ["Hello there, can you suggest a few places to visit in UAE?"],
            ["What is UAE known for?"],
        ],
        inputs=msg,
    )
    
    
    # Chat handler function
    def user(user_message, history):
        return "", history + [[user_message, None]]
    
    def bot(history, model_choice, max_tokens, top_p_val, top_k_val, penalty):
        temp = 0.1
        user_message = history[-1][0]
        history[-1][1] = ""
        for character in stream_chat(
            user_message,
            history[:-1],
            model_choice,
            temp,
            max_tokens,
            top_p_val,
            top_k_val,
            penalty
        ):
            history[-1][1] = character
            yield history
    
    # Set up event handlers
    msg.submit(user, [msg, chatbot], [msg, chatbot], queue=False).then(
        bot, [chatbot, model_dropdown, max_new_tokens, top_p, top_k, presence_penalty], chatbot
    )
    submit_btn.click(user, [msg, chatbot], [msg, chatbot], queue=False).then(
        bot, [chatbot, model_dropdown, max_new_tokens, top_p, top_k, presence_penalty], chatbot
    )

if __name__ == "__main__":
    demo.launch()