factrbench

Running

farimafatahi commited on Oct 27, 2024

Commit

19d12aa

verified ·

1 Parent(s): 3670f4d

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -171,7 +171,21 @@ with tab1:
     # st.markdown('<div class="title">Leaderboard</div>', unsafe_allow_html=True)
     st.markdown('<div class="tab-content">', unsafe_allow_html=True)
-    st.markdown('# Metric Explanation')
     st.markdown('@Farima populate here')
     st.markdown("""

     # st.markdown('<div class="title">Leaderboard</div>', unsafe_allow_html=True)
     st.markdown('<div class="tab-content">', unsafe_allow_html=True)
+    st.markdown('Metrics Explanation')
+    st.markdown(    '''
+    <div class="metric">
+        <br/>
+        <p style="font-size:16px;">
+        <strong> Factual Precision </strong> measures the  ratio of supported units divided by all units averaged over model responses. <strong> Hallucination Score </strong> measures the degree of incorrect or inconclusive content units in model response, with details provided in the paper. We also provide statistics on the average number of unsupported unit (<strong>Avg. Unsupported</strong>), average number of units labelled as undecided (<strong>Avg. Undecided</strong>), Average length of response in terms of the number of tokens, and the average verifiable units existing in the model responses.
+        </p>
+        <p style="font-size:16px;">
+        🔒 for closed LLMs; 🔑 for open-weights LLMs; 🚨 for newly added models"
+        </p>
+    </div>
+    ''',
+    unsafe_allow_html=True
+    )
     st.markdown('@Farima populate here')
     st.markdown("""