Spaces:
Running
on
Zero
Running
on
Zero
| import os | |
| import gradio as gr | |
| from huggingface_hub import login, InferenceClient | |
| import spaces | |
| # Authenticate with Hugging Face API | |
| api_key = os.getenv("TOKEN") | |
| login(api_key) | |
| # Predefined list of models to compare (can be expanded) | |
| model_options = { | |
| "Llama-3.1-70B": "meta-llama/Llama-3.1-70B-Instruct", | |
| "Qwen-2.5-1.5B-Instruct": "Qwen/Qwen2.5-1.5B-Instruct", | |
| "Llama-3.2-1B": "meta-llama/Llama-3.2-1B", | |
| "DeepSeek-V2.5": "deepseek-ai/DeepSeek-V2.5", | |
| "Athene-V2-Chat": "Nexusflow/Athene-V2-Chat", | |
| } | |
| # Initialize clients for models | |
| clients = {name: InferenceClient(repo_id) for name, repo_id in model_options.items()} | |
| # Define the response function | |
| def respond( | |
| message, | |
| history: list[dict], | |
| system_message, | |
| max_tokens, | |
| temperature, | |
| top_p, | |
| selected_models, | |
| ): | |
| messages = [{"role": "system", "content": system_message}] + history | |
| messages.append({"role": "user", "content": message}) | |
| responses = {} | |
| # Generate responses for each selected model | |
| for model_name in selected_models: | |
| client = clients[model_name] | |
| response = "" | |
| for token in client.chat_completion( | |
| messages, max_tokens=max_tokens, stream=True, temperature=temperature, top_p=top_p | |
| ): | |
| delta = token.choices[0].delta.content | |
| response += delta | |
| responses[model_name] = response | |
| return responses | |
| # Build Gradio app | |
| def create_demo(): | |
| with gr.Blocks() as demo: | |
| gr.Markdown("# AI Model Comparison Tool ๐") | |
| gr.Markdown( | |
| """ | |
| Compare responses from two AI models side-by-side. | |
| Select two models, ask a question, and compare their responses in real time! | |
| """ | |
| ) | |
| # Input Section | |
| with gr.Row(): | |
| system_message = gr.Textbox( | |
| value="You are a helpful assistant providing answers for technical and customer support queries.", | |
| label="System message" | |
| ) | |
| user_message = gr.Textbox(label="Your question", placeholder="Type your question here...") | |
| with gr.Row(): | |
| max_tokens = gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens") | |
| temperature = gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature") | |
| top_p = gr.Slider( | |
| minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-p (nucleus sampling)" | |
| ) | |
| # Model Selection Section (no maximum_choices) | |
| with gr.Row(): | |
| selected_models = gr.CheckboxGroup( | |
| choices=list(model_options.keys()), | |
| label="Select two models to compare", | |
| value=["Llama-3.1-70B", "Qwen-2.5-1.5B-Instruct"], # Default models | |
| ) | |
| # Dynamic Response Section | |
| response_boxes = [] | |
| def generate_response_boxes(models): | |
| # Clear current components | |
| response_boxes.clear() | |
| # Add new response boxes dynamically for only two models | |
| for model_name in models: | |
| with gr.Column() as response_column: | |
| response_box = gr.Textbox(label=f"Response from {model_name}", interactive=False) | |
| vote_button = gr.Button(f"Vote for {model_name}") | |
| vote_count = gr.Number(value=0, label=f"Votes for {model_name}") | |
| response_boxes.append((model_name, response_column, response_box, vote_button, vote_count)) | |
| return [gr.update(visible=True) for _ in response_boxes] | |
| # Add a button for generating responses | |
| submit_button = gr.Button("Generate Responses") | |
| vote_state = gr.State([0] * len(model_options)) # Initialize votes for all models | |
| # Generate responses | |
| def generate_responses( | |
| message, history, system_message, max_tokens, temperature, top_p, selected_models | |
| ): | |
| if len(selected_models) != 2: | |
| return "Error: Please select exactly two models to compare." | |
| responses = respond( | |
| message, history, system_message, max_tokens, temperature, top_p, selected_models | |
| ) | |
| outputs = [] | |
| for model_name, _, response_box, *_ in response_boxes: | |
| outputs.append(responses.get(model_name, "")) | |
| return outputs | |
| # Handle votes | |
| def handle_votes(votes, model_name): | |
| index = list(model_options.keys()).index(model_name) | |
| votes[index] += 1 | |
| return votes | |
| # Link button click to generate responses | |
| submit_button.click( | |
| generate_responses, | |
| inputs=[user_message, gr.State([]), system_message, max_tokens, temperature, top_p, selected_models], | |
| outputs=[response[2] for response in response_boxes], | |
| ) | |
| # Link voting buttons to handle votes | |
| for model_name, _, _, vote_button, vote_count in response_boxes: | |
| vote_button.click( | |
| lambda votes, name=model_name: handle_votes(votes, name), | |
| inputs=[vote_state], | |
| outputs=[vote_state, vote_count], | |
| ) | |
| # Update response boxes when models are selected | |
| selected_models.change( | |
| generate_response_boxes, | |
| inputs=[selected_models], | |
| outputs=[gr.update(visible=True) for _ in response_boxes], | |
| ) | |
| return demo | |
| if __name__ == "__main__": | |
| demo = create_demo() | |
| demo.launch() | |