Spaces:
Running
on
Zero
Running
on
Zero
| import gradio as gr | |
| import random | |
| import json | |
| import os | |
| from datetime import datetime | |
| # This would be replaced with your actual SLM integration | |
| def generate_response(query, context, model_name): | |
| """Placeholder function to generate response from an SLM""" | |
| return f"This is a placeholder response from {model_name} based on query: {query} and context: {context}" | |
| def save_evaluation(query, context, model_a, model_b, response_a, response_b, preference): | |
| """Save evaluation results to a JSON file""" | |
| timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S") | |
| evaluation = { | |
| "timestamp": timestamp, | |
| "query": query, | |
| "context": context, | |
| "models": { | |
| "left": model_a, | |
| "right": model_b | |
| }, | |
| "responses": { | |
| "left": response_a, | |
| "right": response_b | |
| }, | |
| "preference": preference | |
| } | |
| # Create directory if it doesn't exist | |
| os.makedirs("evaluations", exist_ok=True) | |
| # Save to a file | |
| with open(f"evaluations/eval_{timestamp.replace(' ', '_').replace(':', '-')}.json", "w") as f: | |
| json.dump(evaluation, f, indent=2) | |
| return "Evaluation saved successfully!" | |
| def process_query(query, context, model_a="SLM-A", model_b="SLM-B"): | |
| """Process query and generate responses from two models""" | |
| # Generate responses | |
| response_a = generate_response(query, context, model_a) | |
| response_b = generate_response(query, context, model_b) | |
| # Randomly swap to avoid position bias | |
| if random.random() > 0.5: | |
| return response_a, response_b, model_a, model_b | |
| else: | |
| return response_b, response_a, model_b, model_a | |
| def submit_evaluation(query, context, response_left, response_right, preference, model_left, model_right): | |
| """Submit and save the evaluation""" | |
| if not preference: | |
| return "Please select a preference before submitting." | |
| save_evaluation(query, context, model_left, model_right, response_left, response_right, preference) | |
| return "Thank you for your evaluation!" | |
| with gr.Blocks(title="SLM-RAG Arena") as app: | |
| gr.Markdown("# SLM-RAG Arena") | |
| gr.Markdown("Compare responses from different models for RAG tasks.") | |
| with gr.Row(): | |
| with gr.Column(): | |
| query_input = gr.Textbox(label="Query", placeholder="Enter your query here...") | |
| context_input = gr.Textbox(label="Context", placeholder="Enter context information here...", lines=5) | |
| generate_btn = gr.Button("Generate Responses") | |
| # Hidden state variables | |
| model_left = gr.State("") | |
| model_right = gr.State("") | |
| with gr.Row(): | |
| with gr.Column(): | |
| gr.Markdown("### Response A") | |
| response_left = gr.Textbox(label="", lines=10, interactive=False) | |
| with gr.Column(): | |
| gr.Markdown("### Response B") | |
| response_right = gr.Textbox(label="", lines=10, interactive=False) | |
| with gr.Row(): | |
| preference = gr.Radio( | |
| choices=["Prefer Left", "Tie", "Prefer Right", "Neither"], | |
| label="Which response do you prefer?" | |
| ) | |
| submit_btn = gr.Button("Submit Evaluation") | |
| result = gr.Textbox(label="Result") | |
| generate_btn.click( | |
| process_query, | |
| inputs=[query_input, context_input], | |
| outputs=[response_left, response_right, model_left, model_right] | |
| ) | |
| submit_btn.click( | |
| submit_evaluation, | |
| inputs=[query_input, context_input, response_left, response_right, preference, model_left, model_right], | |
| outputs=[result] | |
| ) | |
| app.launch() |