Spaces:

llmonitor
/

benchmarks

Build error

App Files Files Community

vincelwt commited on Oct 8, 2023

Commit

34dd66d

unverified ·

1 Parent(s): 12e3264

add other url

Browse files

Files changed (1) hide show

app/leaderboard/page.js +63 -0

app/leaderboard/page.js ADDED Viewed

	@@ -0,0 +1,63 @@

+import db, { getModels } from "@/utils/db"
+import Link from "next/link"
+export default async function Leaderboard() {
+  const [potentialPoints] = await db`SELECT SUM(points) as total FROM rubrics`
+  const models = await getModels()
+  return (
+    <>
+      <p>
+        Traditional LLMs benchmarks have drawbacks: they quickly become part of
+        training datasets and are hard to relate to in terms of real-world
+        use-cases.
+      </p>
+      <p>
+        I made this as an experiment to address these issues. Here, the dataset
+        is dynamic (changes every week) and composed of crowdsourced real-world
+        prompts.
+      </p>
+      <p>
+        We then use GPT-4 to grade each model's response against a set of
+        rubrics (more details on the about page). The prompt dataset is easily
+        explorable.
+      </p>
+      <p>
+        Everything is then stored in a Postgres database and this page shows the
+        raw results.
+      </p>
+      <br />
+      <table style={{ maxWidth: 600 }}>
+        <thead>
+          <tr>
+            <th width={70}>Rank</th>
+            <th width={250}>Model</th>
+            <th>Score</th>
+            <th>Results</th>
+          </tr>
+        </thead>
+        <tbody>
+          {models
+            .filter((s) => s.total_score)
+            .map((model, i) => (
+              <tr key={i}>
+                <td>{model.rank}</td>
+                <td>{model.name}</td>
+                <td>
+                  {parseInt((model.total_score / potentialPoints.total) * 100)}
+                </td>
+                <td>
+                  <Link
+                    href={`/${model.api_id.split("/").pop().toLowerCase()}`}
+                  >
+                    view
+                  </Link>
+                </td>
+              </tr>
+            ))}
+        </tbody>
+      </table>
+    </>
+  )
+}