Spaces:
Build error
Build error
| import sqlite3 | |
| import time | |
| from queriers import together, cohere, openai_func, openrouter, ai21, alephalpha | |
| db = sqlite3.connect("./database.db") | |
| db.row_factory = sqlite3.Row | |
| cursor = db.cursor() | |
| def remove_end(s, suffix): | |
| if s.endswith(suffix): | |
| return s[:-len(suffix)] | |
| return s | |
| # Fetch models | |
| models = cursor.execute("SELECT * FROM models").fetchall() | |
| models = [dict(model) for model in models] | |
| # Fetch prompts | |
| prompts = cursor.execute("SELECT * FROM prompts").fetchall() | |
| prompts = [dict(prompt) for prompt in prompts] | |
| def insert_result(modelId, promptId, result, duration, rate): | |
| cursor.execute( | |
| "INSERT INTO results (model, prompt, result, duration, rate) VALUES (?, ?, ?, ?, ?)", | |
| (modelId, promptId, result, duration, rate) | |
| ) | |
| db.commit() | |
| pass | |
| def check_if_results_exist(modelId, promptId): | |
| results = cursor.execute( | |
| "SELECT * FROM results WHERE model = ? AND prompt = ? LIMIT 1", (modelId, promptId) | |
| ).fetchall() | |
| return len(results) > 0 | |
| def ask_prompt(prompt, model): | |
| exists = check_if_results_exist(model["id"], prompt["id"]) | |
| if exists: | |
| print("Skipping, already got benchmark") | |
| return | |
| mapping = { | |
| "together": together, | |
| "cohere": cohere, # Add these functions to the mapping once they are translated | |
| "openai": openai_func, | |
| "openrouter": openrouter, | |
| "ai21": ai21, | |
| # "alephalpha": alephalpha # TODO: get a working API key | |
| } | |
| querier = mapping.get(model["api"]) | |
| if not querier: | |
| print(f"No querier for {model['api']}") | |
| return | |
| print(f"Querying {model['name']}") | |
| start_time = time.time() | |
| try: | |
| response_text = querier(model, prompt) | |
| # Remove newlines and trailing spaces + stop sequence | |
| cleaned = response_text.strip() | |
| if prompt["stop"]: | |
| cleaned = remove_end(cleaned, prompt["stop"]) | |
| end_time = time.time() | |
| duration = end_time - start_time | |
| chars_per_second = round(len(response_text) / duration, 2) | |
| print("------------------------------------") | |
| print(f"Result: {cleaned}") | |
| print(f"Took {duration*1000} ms ({chars_per_second} chars/s)") | |
| print("------------------------------------") | |
| insert_result(model["id"], prompt["id"], cleaned, duration*1000, chars_per_second) | |
| except Exception as e: | |
| print(f"Error querying {model['name']}", e) | |
| total_benchmarks = len(models) * len(prompts) | |
| print(f"Running {total_benchmarks} benchmarks") | |
| for model in models: | |
| if model["type"] == "language": | |
| continue | |
| for prompt in prompts: | |
| if prompt["type"] != "code" and model["type"] == "code": | |
| print("Skipping non-code benchmark for code model") | |
| continue | |
| ask_prompt(prompt, model) | |
| db.close() |