Update app.py
Browse files
app.py
CHANGED
|
@@ -24,11 +24,13 @@ import pandas as pd
|
|
| 24 |
|
| 25 |
|
| 26 |
# https://gist.github.com/dustalov/41678b70c40ba5a55430fa5e77b121d9#file-newman-py
|
| 27 |
-
def aggregate(wins: npt.NDArray[np.int64], ties: npt.NDArray[np.int64],
|
| 28 |
-
|
| 29 |
assert wins.shape == ties.shape, 'wins and ties shapes are different'
|
| 30 |
|
| 31 |
-
|
|
|
|
|
|
|
| 32 |
|
| 33 |
converged, iterations = False, 0
|
| 34 |
|
|
@@ -44,6 +46,7 @@ def aggregate(wins: npt.NDArray[np.int64], ties: npt.NDArray[np.int64], toleranc
|
|
| 44 |
)
|
| 45 |
|
| 46 |
v = v_numerator / v_denominator
|
|
|
|
| 47 |
|
| 48 |
pi_old = pi.copy()
|
| 49 |
|
|
@@ -60,13 +63,14 @@ def aggregate(wins: npt.NDArray[np.int64], ties: npt.NDArray[np.int64], toleranc
|
|
| 60 |
)
|
| 61 |
|
| 62 |
pi = pi_numerator / pi_denominator
|
|
|
|
| 63 |
|
| 64 |
converged = bool(np.all(np.abs(pi / (pi + 1) - pi_old / (pi_old + 1)) < tolerance)) or (iterations >= limit)
|
| 65 |
|
| 66 |
return pi
|
| 67 |
|
| 68 |
|
| 69 |
-
def handler(file: typing.IO[bytes]) -> pd.DataFrame:
|
| 70 |
try:
|
| 71 |
df = pd.read_csv(file.name, dtype=str)
|
| 72 |
except ValueError as e:
|
|
@@ -87,21 +91,22 @@ def handler(file: typing.IO[bytes]) -> pd.DataFrame:
|
|
| 87 |
df_wins = pd.pivot_table(df[df['winner'].isin(['left', 'right'])],
|
| 88 |
index='left', columns='right', values='winner',
|
| 89 |
aggfunc='count', fill_value=0)
|
| 90 |
-
df_wins = df_wins.reindex(labels=index, columns=index, fill_value=0)
|
| 91 |
|
| 92 |
df_ties = pd.pivot_table(df[df['winner'] == 'tie'],
|
| 93 |
index='left', columns='right', values='winner', aggfunc='count',
|
| 94 |
fill_value=0)
|
| 95 |
-
df_ties = df_ties.reindex(labels=index, columns=index, fill_value=0)
|
| 96 |
|
| 97 |
wins = df_wins.to_numpy(dtype=np.int64)
|
| 98 |
ties = df_ties.to_numpy(dtype=np.int64)
|
| 99 |
ties += ties.T
|
| 100 |
|
| 101 |
-
scores = aggregate(wins, ties)
|
| 102 |
|
| 103 |
df_result = pd.DataFrame(data={'score': scores}, index=index)
|
| 104 |
-
df_result['rank'] = df_result['score'].rank(ascending=False).astype(int)
|
|
|
|
| 105 |
df_result.sort_values(by=['rank', 'score'], ascending=[True, False], inplace=True)
|
| 106 |
df_result.reset_index(inplace=True)
|
| 107 |
|
|
@@ -110,25 +115,33 @@ def handler(file: typing.IO[bytes]) -> pd.DataFrame:
|
|
| 110 |
|
| 111 |
iface = gr.Interface(
|
| 112 |
fn=handler,
|
| 113 |
-
inputs=
|
| 114 |
-
|
| 115 |
-
|
| 116 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 117 |
outputs=gr.Dataframe(
|
| 118 |
-
headers=['item', 'score', 'rank']
|
|
|
|
| 119 |
),
|
| 120 |
title='Turn Your Side-by-Side Comparisons into Ranking!',
|
| 121 |
description='''
|
| 122 |
This easy-to-use tool transforms pairwise comparisons (aka side-by-side) to a meaningful ranking of items.
|
| 123 |
|
| 124 |
-
As an input, it expects a comma-separated (CSV) file containing the following columns:
|
| 125 |
|
| 126 |
- `left`: the first compared item
|
| 127 |
- `right`: the second compared item
|
| 128 |
- `winner`: the label indicating the winning item
|
| 129 |
|
| 130 |
Possible values for `winner` are `left`, `right`, or `tie`.
|
| 131 |
-
The provided example might be a good starting point
|
| 132 |
|
| 133 |
As the output, this tool provides a table with items, their estimated scores, and ranks.
|
| 134 |
''',
|
|
|
|
| 24 |
|
| 25 |
|
| 26 |
# https://gist.github.com/dustalov/41678b70c40ba5a55430fa5e77b121d9#file-newman-py
|
| 27 |
+
def aggregate(wins: npt.NDArray[np.int64], ties: npt.NDArray[np.int64],
|
| 28 |
+
seed: int = 0, tolerance: float = 10e-6, limit: int = 20) -> npt.ArrayLike:
|
| 29 |
assert wins.shape == ties.shape, 'wins and ties shapes are different'
|
| 30 |
|
| 31 |
+
rng = np.random.default_rng(seed)
|
| 32 |
+
|
| 33 |
+
pi, v = rng.random(wins.shape[0]), rng.random()
|
| 34 |
|
| 35 |
converged, iterations = False, 0
|
| 36 |
|
|
|
|
| 46 |
)
|
| 47 |
|
| 48 |
v = v_numerator / v_denominator
|
| 49 |
+
v = np.nan_to_num(v, copy=False)
|
| 50 |
|
| 51 |
pi_old = pi.copy()
|
| 52 |
|
|
|
|
| 63 |
)
|
| 64 |
|
| 65 |
pi = pi_numerator / pi_denominator
|
| 66 |
+
pi = np.nan_to_num(pi, copy=False)
|
| 67 |
|
| 68 |
converged = bool(np.all(np.abs(pi / (pi + 1) - pi_old / (pi_old + 1)) < tolerance)) or (iterations >= limit)
|
| 69 |
|
| 70 |
return pi
|
| 71 |
|
| 72 |
|
| 73 |
+
def handler(file: typing.IO[bytes], seed: int) -> pd.DataFrame:
|
| 74 |
try:
|
| 75 |
df = pd.read_csv(file.name, dtype=str)
|
| 76 |
except ValueError as e:
|
|
|
|
| 91 |
df_wins = pd.pivot_table(df[df['winner'].isin(['left', 'right'])],
|
| 92 |
index='left', columns='right', values='winner',
|
| 93 |
aggfunc='count', fill_value=0)
|
| 94 |
+
df_wins = df_wins.reindex(labels=index, columns=index, fill_value=0, copy=False)
|
| 95 |
|
| 96 |
df_ties = pd.pivot_table(df[df['winner'] == 'tie'],
|
| 97 |
index='left', columns='right', values='winner', aggfunc='count',
|
| 98 |
fill_value=0)
|
| 99 |
+
df_ties = df_ties.reindex(labels=index, columns=index, fill_value=0, copy=False)
|
| 100 |
|
| 101 |
wins = df_wins.to_numpy(dtype=np.int64)
|
| 102 |
ties = df_ties.to_numpy(dtype=np.int64)
|
| 103 |
ties += ties.T
|
| 104 |
|
| 105 |
+
scores = aggregate(wins, ties, seed=seed)
|
| 106 |
|
| 107 |
df_result = pd.DataFrame(data={'score': scores}, index=index)
|
| 108 |
+
df_result['rank'] = df_result['score'].rank(na_option='bottom', ascending=False).astype(int)
|
| 109 |
+
df_result.fillna(np.NINF, inplace=True)
|
| 110 |
df_result.sort_values(by=['rank', 'score'], ascending=[True, False], inplace=True)
|
| 111 |
df_result.reset_index(inplace=True)
|
| 112 |
|
|
|
|
| 115 |
|
| 116 |
iface = gr.Interface(
|
| 117 |
fn=handler,
|
| 118 |
+
inputs=[
|
| 119 |
+
gr.File(
|
| 120 |
+
value='example.csv',
|
| 121 |
+
file_types=['.tsv', '.csv'],
|
| 122 |
+
label='Comparisons'
|
| 123 |
+
),
|
| 124 |
+
gr.Number(
|
| 125 |
+
label='Seed',
|
| 126 |
+
precision=0
|
| 127 |
+
)
|
| 128 |
+
],
|
| 129 |
outputs=gr.Dataframe(
|
| 130 |
+
headers=['item', 'score', 'rank'],
|
| 131 |
+
label='Ranking'
|
| 132 |
),
|
| 133 |
title='Turn Your Side-by-Side Comparisons into Ranking!',
|
| 134 |
description='''
|
| 135 |
This easy-to-use tool transforms pairwise comparisons (aka side-by-side) to a meaningful ranking of items.
|
| 136 |
|
| 137 |
+
As an input, it expects a comma-separated (CSV) file with a header containing the following columns:
|
| 138 |
|
| 139 |
- `left`: the first compared item
|
| 140 |
- `right`: the second compared item
|
| 141 |
- `winner`: the label indicating the winning item
|
| 142 |
|
| 143 |
Possible values for `winner` are `left`, `right`, or `tie`.
|
| 144 |
+
The provided example might be a good starting point.
|
| 145 |
|
| 146 |
As the output, this tool provides a table with items, their estimated scores, and ranks.
|
| 147 |
''',
|