TuRTLe-Leaderboard

Running

App Files Files Community

TuRTLe-Leaderboard / utils.py

ggcristian

November release: we add new (big) models and mark the release of TuRTLe v2 codebase

d02c1e3 4 days ago

raw

history blame contribute delete

7.93 kB

	import sys

	import gradio as gr
	import numpy as np
	import pandas as pd
	import plotly.express as px
	import plotly.graph_objects as go
	import re

	from config.constants import COLUMN_MAPPINGS, COLUMN_ORDER, TYPE_EMOJI, DISCARDED_MODELS


	def model_hyperlink(link, model_name, release, thinking=False):
	ret = f'<a target="_blank" href="{link}" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">{model_name}</a>'
	new_badge = f' <span class="badge new-badge">new</span>'
	reasoning_badge = f' <span class="badge reasoning-badge">reasoning</span>'

	if release == "V4":
	# show new badge only to the latest releases
	return ret + reasoning_badge + new_badge if thinking == "Reasoning" else ret + new_badge
	else:
	return ret + reasoning_badge if thinking == "Reasoning" else ret


	def extract_name_from_link(html: str) -> str:
	"""
	Extracts the model name from the HTML generated by model_hyperlink()
	"""
	if not isinstance(html, str):
	return html

	match = re.search(r'<a[^>]>(.?)</a>', html)
	if match:
	return match.group(1).strip()

	return re.sub(r'<[^>]+>', '', html).strip()


	def handle_special_cases(benchmark, metric):
	if metric == "Exact Matching (EM)":
	benchmark = "RTL-Repo"
	elif benchmark == "RTL-Repo":
	metric = "Exact Matching (EM)"
	return benchmark, metric


	def filter_RTLRepo(subset: pd.DataFrame, name=str) -> pd.DataFrame:
	if subset.empty:
	return pd.DataFrame(columns=["Type", "Model", "Params", "Exact Matching (EM)"])

	subset = subset.drop(subset[subset.Score < 0.0].index)

	# Check again if empty after filtering
	if subset.empty:
	return pd.DataFrame(columns=["Type", "Model", "Params", "Exact Matching (EM)"])

	details = subset[["Model", "Model URL", "Model Type", "Params", "Release", "Thinking"]].drop_duplicates(
	"Model"
	)
	filtered_df = subset[["Model", "Score"]].rename(columns={"Score": "Exact Matching (EM)"})

	filtered_df = pd.merge(filtered_df, details, on="Model", how="left")
	filtered_df["Model"] = filtered_df.apply(
	lambda row: model_hyperlink(
	row["Model URL"],
	row["Model"],
	row["Release"],
	),
	axis=1,
	)
	filtered_df["Type"] = filtered_df["Model Type"].map(lambda x: TYPE_EMOJI.get(x, ""))
	filtered_df = filtered_df[["Type", "Model", "Params", "Exact Matching (EM)"]]
	filtered_df = filtered_df.sort_values(by="Exact Matching (EM)", ascending=False).reset_index(drop=True)

	if name == "Other Models":
	filtered_df["Date Discarded"] = filtered_df["Model"].apply(lambda x: DISCARDED_MODELS.get(extract_name_from_link(x), "N/A"))

	# reorder to put Date Discarded between Params and Exact Matching (EM)
	cols = ["Type", "Model", "Parameters (B)", "Date Discarded", "Exact Matching (EM)"]
	filtered_df = filtered_df[[c for c in cols if c in filtered_df.columns]]
	return filtered_df


	def filter_bench(subset: pd.DataFrame, df_agg=None, agg_column=None, name=str) -> pd.DataFrame:
	if subset.empty:
	return pd.DataFrame(columns=COLUMN_ORDER)

	details = subset[["Model", "Model URL", "Model Type", "Params", "Release", "Thinking"]].drop_duplicates(
	"Model"
	)
	if "RTLLM" in subset["Benchmark"].unique():
	pivot_df = (
	subset.pivot_table(index="Model", columns="Metric", values="Score", aggfunc=custom_agg_s2r)
	.reset_index()
	.round(2)
	)
	else:
	pivot_df = (
	subset.pivot_table(index="Model", columns="Metric", values="Score", aggfunc=custom_agg_cc)
	.reset_index()
	.round(2)
	)

	# if df_agg is not None and agg_column is not None and agg_column in df_agg.columns:
	# agg_data = df_agg[["Model", agg_column]].rename(
	# columns={agg_column: "Aggregated ⬆️"}
	# )
	# pivot_df = pd.merge(pivot_df, agg_data, on="Model", how="left")
	# else: # fallback
	# pivot_df["Aggregated ⬆️"] = pivot_df.mean(axis=1, numeric_only=True).round(2)



	pivot_df = pd.merge(pivot_df, details, on="Model", how="left")
	pivot_df["Model"] = pivot_df.apply(
	lambda row: model_hyperlink(row["Model URL"], row["Model"], row["Release"], row["Thinking"]),
	axis=1,
	)
	pivot_df["Type"] = pivot_df["Model Type"].map(lambda x: TYPE_EMOJI.get(x, ""))

	if all(col in pivot_df.columns for col in ["Power", "Performance", "Area"]):
	pivot_df["Post-Synthesis (PSQ)"] = pivot_df[["Power", "Performance", "Area"]].mean(axis=1).round(2)


	pivot_df.rename(columns=COLUMN_MAPPINGS, inplace=True)
	pivot_df = pivot_df[[col for col in COLUMN_ORDER if col in pivot_df.columns]]

	if "Functionality" in pivot_df.columns:
	pivot_df = pivot_df.sort_values(by="Functionality", ascending=False).reset_index(drop=True)

	if name == "Other Models":
	pivot_df["Date Discarded"] = pivot_df["Model"].apply(lambda x: DISCARDED_MODELS.get(extract_name_from_link(x), "N/A"))
	# reorder to put Date Discarded between Params Syntax
	cols = ["Type", "Model", "Parameters (B)", "Date Discarded", "Syntax", "Functionality", "Synthesis", "Post-Synthesis"]
	pivot_df = pivot_df[[c for c in cols if c in pivot_df.columns]]
	return pivot_df


	def custom_agg_s2r(vals):
	if len(vals) == 2:
	s2r_val = vals.iloc[0]
	rtllm_val = vals.iloc[1]
	w1 = 155
	w2 = 47
	result = (w1 * s2r_val + w2 * rtllm_val) / (w1 + w2)
	else:
	result = vals.iloc[0]
	return round(result, 2)


	def custom_agg_cc(vals):
	if len(vals) == 2:
	veval_val = vals.iloc[0]
	vgen_val = vals.iloc[1]
	w1 = 155
	w2 = 17
	result = (w1 * veval_val + w2 * vgen_val) / (w1 + w2)
	else:
	result = vals.iloc[0]
	return round(result, 2)


	def filter_bench_all(subset: pd.DataFrame, df_agg=None, agg_column=None, name=str) -> pd.DataFrame:
	if subset.empty:
	return pd.DataFrame(columns=COLUMN_ORDER)

	details = subset[["Model", "Model URL", "Model Type", "Params", "Release", "Thinking"]].drop_duplicates(
	"Model"
	)
	if "RTLLM" in subset["Benchmark"].unique():
	pivot_df = (
	subset.pivot_table(index="Model", columns="Metric", values="Score", aggfunc=custom_agg_s2r)
	.reset_index()
	.round(2)
	)
	else:
	pivot_df = (
	subset.pivot_table(index="Model", columns="Metric", values="Score", aggfunc=custom_agg_cc)
	.reset_index()
	.round(2)
	)

	pivot_df = pd.merge(pivot_df, details, on="Model", how="left")
	pivot_df["Model"] = pivot_df.apply(
	lambda row: model_hyperlink(row["Model URL"], row["Model"], row["Release"], row["Thinking"]),
	axis=1,
	)
	pivot_df["Type"] = pivot_df["Model Type"].map(lambda x: TYPE_EMOJI.get(x, ""))

	if all(col in pivot_df.columns for col in ["Power", "Performance", "Area"]):
	pivot_df["Post-Synthesis (PSQ)"] = pivot_df[["Power", "Performance", "Area"]].mean(axis=1).round(2)

	pivot_df.rename(columns=COLUMN_MAPPINGS, inplace=True)
	pivot_df = pivot_df[[col for col in COLUMN_ORDER if col in pivot_df.columns]]

	if "Functionality" in pivot_df.columns:
	pivot_df = pivot_df.sort_values(by="Functionality", ascending=False).reset_index(drop=True)

	if name == "Other Models":
	pivot_df["Date Discarded"] = pivot_df["Model"].apply(lambda x: DISCARDED_MODELS.get(extract_name_from_link(x), "N/A"))
	# reorder to put Date Discarded between Params Syntax
	cols = ["Type", "Model", "Parameters (B)", "Date Discarded", "Syntax", "Functionality", "Synthesis", "Post-Synthesis"]
	pivot_df = pivot_df[[c for c in cols if c in pivot_df.columns]]

	return pivot_df