Spaces:

dustalov
/

evalica

Sleeping

App Files Files Community

dustalov commited on Aug 26, 2023

Commit

c4d6746

verified ·

1 Parent(s): 18b2319

Update app.py

Browse files

Files changed (1) hide show

app.py +28 -15

app.py CHANGED Viewed

@@ -24,11 +24,13 @@ import pandas as pd
 # https://gist.github.com/dustalov/41678b70c40ba5a55430fa5e77b121d9#file-newman-py
-def aggregate(wins: npt.NDArray[np.int64], ties: npt.NDArray[np.int64], tolerance: float = 10e-6, limit: int = 20) \
-        -> npt.ArrayLike:
     assert wins.shape == ties.shape, 'wins and ties shapes are different'
-    pi, v = np.random.rand(wins.shape[0]), np.random.rand()
     converged, iterations = False, 0
@@ -44,6 +46,7 @@ def aggregate(wins: npt.NDArray[np.int64], ties: npt.NDArray[np.int64], toleranc
         )
         v = v_numerator / v_denominator
         pi_old = pi.copy()
@@ -60,13 +63,14 @@ def aggregate(wins: npt.NDArray[np.int64], ties: npt.NDArray[np.int64], toleranc
         )
         pi = pi_numerator / pi_denominator
         converged = bool(np.all(np.abs(pi / (pi + 1) - pi_old / (pi_old + 1)) < tolerance)) or (iterations >= limit)
     return pi
-def handler(file: typing.IO[bytes]) -> pd.DataFrame:
     try:
         df = pd.read_csv(file.name, dtype=str)
     except ValueError as e:
@@ -87,21 +91,22 @@ def handler(file: typing.IO[bytes]) -> pd.DataFrame:
     df_wins = pd.pivot_table(df[df['winner'].isin(['left', 'right'])],
                              index='left', columns='right', values='winner',
                              aggfunc='count', fill_value=0)
-    df_wins = df_wins.reindex(labels=index, columns=index, fill_value=0)
     df_ties = pd.pivot_table(df[df['winner'] == 'tie'],
                              index='left', columns='right', values='winner', aggfunc='count',
                              fill_value=0)
-    df_ties = df_ties.reindex(labels=index, columns=index, fill_value=0)
     wins = df_wins.to_numpy(dtype=np.int64)
     ties = df_ties.to_numpy(dtype=np.int64)
     ties += ties.T
-    scores = aggregate(wins, ties)
     df_result = pd.DataFrame(data={'score': scores}, index=index)
-    df_result['rank'] = df_result['score'].rank(ascending=False).astype(int)
     df_result.sort_values(by=['rank', 'score'], ascending=[True, False], inplace=True)
     df_result.reset_index(inplace=True)
@@ -110,25 +115,33 @@ def handler(file: typing.IO[bytes]) -> pd.DataFrame:
 iface = gr.Interface(
     fn=handler,
-    inputs=gr.File(
-        value='example.csv',
-        file_types=['.tsv', '.csv']
-    ),
     outputs=gr.Dataframe(
-        headers=['item', 'score', 'rank']
     ),
     title='Turn Your Side-by-Side Comparisons into Ranking!',
     description='''
 This easy-to-use tool transforms pairwise comparisons (aka side-by-side) to a meaningful ranking of items.
-As an input, it expects a comma-separated (CSV) file containing the following columns:
 - `left`: the first compared item
 - `right`: the second compared item
 - `winner`: the label indicating the winning item
 Possible values for `winner` are `left`, `right`, or `tie`.
-The provided example might be a good starting point of the format.
 As the output, this tool provides a table with items, their estimated scores, and ranks.
     ''',

 # https://gist.github.com/dustalov/41678b70c40ba5a55430fa5e77b121d9#file-newman-py
+def aggregate(wins: npt.NDArray[np.int64], ties: npt.NDArray[np.int64],
+              seed: int = 0, tolerance: float = 10e-6, limit: int = 20) -> npt.ArrayLike:
     assert wins.shape == ties.shape, 'wins and ties shapes are different'
+    rng = np.random.default_rng(seed)
+    pi, v = rng.random(wins.shape[0]), rng.random()
     converged, iterations = False, 0
         )
         v = v_numerator / v_denominator
+        v = np.nan_to_num(v, copy=False)
         pi_old = pi.copy()
         )
         pi = pi_numerator / pi_denominator
+        pi = np.nan_to_num(pi, copy=False)
         converged = bool(np.all(np.abs(pi / (pi + 1) - pi_old / (pi_old + 1)) < tolerance)) or (iterations >= limit)
     return pi
+def handler(file: typing.IO[bytes], seed: int) -> pd.DataFrame:
     try:
         df = pd.read_csv(file.name, dtype=str)
     except ValueError as e:
     df_wins = pd.pivot_table(df[df['winner'].isin(['left', 'right'])],
                              index='left', columns='right', values='winner',
                              aggfunc='count', fill_value=0)
+    df_wins = df_wins.reindex(labels=index, columns=index, fill_value=0, copy=False)
     df_ties = pd.pivot_table(df[df['winner'] == 'tie'],
                              index='left', columns='right', values='winner', aggfunc='count',
                              fill_value=0)
+    df_ties = df_ties.reindex(labels=index, columns=index, fill_value=0, copy=False)
     wins = df_wins.to_numpy(dtype=np.int64)
     ties = df_ties.to_numpy(dtype=np.int64)
     ties += ties.T
+    scores = aggregate(wins, ties, seed=seed)
     df_result = pd.DataFrame(data={'score': scores}, index=index)
+    df_result['rank'] = df_result['score'].rank(na_option='bottom', ascending=False).astype(int)
+    df_result.fillna(np.NINF, inplace=True)
     df_result.sort_values(by=['rank', 'score'], ascending=[True, False], inplace=True)
     df_result.reset_index(inplace=True)
 iface = gr.Interface(
     fn=handler,
+    inputs=[
+        gr.File(
+            value='example.csv',
+            file_types=['.tsv', '.csv'],
+            label='Comparisons'
+        ),
+        gr.Number(
+            label='Seed',
+            precision=0
+        )
+    ],
     outputs=gr.Dataframe(
+        headers=['item', 'score', 'rank'],
+        label='Ranking'
     ),
     title='Turn Your Side-by-Side Comparisons into Ranking!',
     description='''
 This easy-to-use tool transforms pairwise comparisons (aka side-by-side) to a meaningful ranking of items.
+As an input, it expects a comma-separated (CSV) file with a header containing the following columns:
 - `left`: the first compared item
 - `right`: the second compared item
 - `winner`: the label indicating the winning item
 Possible values for `winner` are `left`, `right`, or `tie`.
+The provided example might be a good starting point.
 As the output, this tool provides a table with items, their estimated scores, and ranks.
     ''',