Spaces:
Sleeping
Sleeping
update
Browse files- app.py +40 -2
- benchmark_data.csv +11 -0
app.py
CHANGED
|
@@ -1,4 +1,42 @@
|
|
| 1 |
import streamlit as st
|
|
|
|
|
|
|
| 2 |
|
| 3 |
-
|
| 4 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
import streamlit as st
|
| 2 |
+
import pandas as pd
|
| 3 |
+
from fuzzywuzzy import process
|
| 4 |
|
| 5 |
+
|
| 6 |
+
def load_data():
|
| 7 |
+
return pd.read_csv("benchmark_data.csv")
|
| 8 |
+
|
| 9 |
+
|
| 10 |
+
def fuzzy_search(data, query, column):
|
| 11 |
+
if query:
|
| 12 |
+
choices = data[column].unique()
|
| 13 |
+
results = process.extract(query, choices, limit=10)
|
| 14 |
+
selected = [res[0] for res in results]
|
| 15 |
+
return data[data[column].isin(selected)]
|
| 16 |
+
return data
|
| 17 |
+
|
| 18 |
+
|
| 19 |
+
def main():
|
| 20 |
+
st.title("Multihop-RAG Benchmark Space")
|
| 21 |
+
|
| 22 |
+
data = load_data()
|
| 23 |
+
|
| 24 |
+
st.sidebar.header("Search Options")
|
| 25 |
+
framework_query = st.sidebar.text_input("Search by Framework")
|
| 26 |
+
model_query = st.sidebar.text_input("Search by Model")
|
| 27 |
+
|
| 28 |
+
if framework_query:
|
| 29 |
+
data = fuzzy_search(data, framework_query, 'framework')
|
| 30 |
+
if model_query:
|
| 31 |
+
data = fuzzy_search(data, model_query, 'model')
|
| 32 |
+
|
| 33 |
+
st.header("Benchmark Results")
|
| 34 |
+
st.write("Displaying results for MRR@10 and Hit@10 across different frameworks, models, and chunk sizes.")
|
| 35 |
+
st.dataframe(data)
|
| 36 |
+
|
| 37 |
+
if st.sidebar.checkbox("Show Metrics Distribution"):
|
| 38 |
+
st.subheader("Metrics Distribution")
|
| 39 |
+
st.bar_chart(data[['MRR@10', 'Hit@10']])
|
| 40 |
+
|
| 41 |
+
if __name__ == "__main__":
|
| 42 |
+
main()
|
benchmark_data.csv
ADDED
|
@@ -0,0 +1,11 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
framework,model,chunk size,MRR@10,Hit@10
|
| 2 |
+
Transformers,BERT-base,128,0.32,0.45
|
| 3 |
+
Transformers,BERT-large,256,0.37,0.50
|
| 4 |
+
Fairseq,ROBERTA-base,128,0.35,0.48
|
| 5 |
+
Fairseq,ROBERTA-large,256,0.40,0.55
|
| 6 |
+
HuggingFace,GPT-2,128,0.30,0.44
|
| 7 |
+
HuggingFace,GPT-3,256,0.42,0.57
|
| 8 |
+
TensorFlow,T5-small,128,0.31,0.46
|
| 9 |
+
TensorFlow,T5-large,256,0.39,0.51
|
| 10 |
+
PyTorch,BART-base,128,0.33,0.47
|
| 11 |
+
PyTorch,BART-large,256,0.38,0.53
|