Commit
·
44bb8e2
1
Parent(s):
46c15e8
Remove tabs :(
Browse files
models.py
CHANGED
|
@@ -34,6 +34,7 @@ def modality(row):
|
|
| 34 |
|
| 35 |
supported_revisions = ["27_09_22"]
|
| 36 |
|
|
|
|
| 37 |
def process_dataset(version):
|
| 38 |
# Load dataset at specified revision
|
| 39 |
dataset = load_dataset("open-source-metrics/model-repos-stats", revision=version)
|
|
@@ -70,9 +71,15 @@ data["tags"] = data.apply(eval_tags, axis=1)
|
|
| 70 |
total_samples = data.shape[0]
|
| 71 |
st.metric(label="Total models", value=total_samples)
|
| 72 |
|
| 73 |
-
|
|
|
|
| 74 |
|
| 75 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 76 |
st.header("Languages info")
|
| 77 |
|
| 78 |
data.loc[data.languages == "False", 'languages'] = None
|
|
@@ -165,7 +172,8 @@ with tab1:
|
|
| 165 |
|
| 166 |
|
| 167 |
|
| 168 |
-
with tab2:
|
|
|
|
| 169 |
st.header("License info")
|
| 170 |
|
| 171 |
no_license_count = data["license"].isna().sum()
|
|
@@ -209,7 +217,8 @@ with tab2:
|
|
| 209 |
d = data["license"].value_counts().rename_axis("license").to_frame('counts').reset_index()
|
| 210 |
st.dataframe(d)
|
| 211 |
|
| 212 |
-
with tab3:
|
|
|
|
| 213 |
st.header("Pipeline info")
|
| 214 |
|
| 215 |
tags = data["tags"].explode()
|
|
@@ -397,7 +406,8 @@ with tab3:
|
|
| 397 |
# todo : add activity metric
|
| 398 |
|
| 399 |
|
| 400 |
-
with tab4:
|
|
|
|
| 401 |
st.header("Discussions Tab info")
|
| 402 |
|
| 403 |
columns_of_interest = ["prs_count", "prs_open", "prs_merged", "prs_closed", "discussions_count", "discussions_open", "discussions_closed"]
|
|
@@ -424,7 +434,8 @@ with tab4:
|
|
| 424 |
filtered_data = data[["repo_id", "prs_count", "prs_open", "prs_merged", "prs_closed", "discussions_count", "discussions_open", "discussions_closed"]].sort_values("prs_count", ascending=False).reset_index(drop=True)
|
| 425 |
st.dataframe(filtered_data)
|
| 426 |
|
| 427 |
-
with tab5:
|
|
|
|
| 428 |
st.header("Library info")
|
| 429 |
|
| 430 |
no_library_count = data["library"].isna().sum()
|
|
@@ -490,7 +501,8 @@ with tab5:
|
|
| 490 |
filtered_data = filtered_data[columns_of_interest]
|
| 491 |
st.dataframe(filtered_data)
|
| 492 |
|
| 493 |
-
with tab6:
|
|
|
|
| 494 |
st.header("Model cards")
|
| 495 |
|
| 496 |
columns_of_interest = ["has_model_index", "has_metadata", "has_text", "text_length"]
|
|
@@ -533,7 +545,8 @@ with tab6:
|
|
| 533 |
y=alt.X('tag', sort=None)
|
| 534 |
))
|
| 535 |
|
| 536 |
-
with tab7:
|
|
|
|
| 537 |
st.header("Authors")
|
| 538 |
st.text("This info corresponds to the repos owned by the authors")
|
| 539 |
authors = data.groupby("author").sum().drop(["text_length", "Unnamed: 0", "language_count"], axis=1).sort_values("downloads_30d", ascending=False)
|
|
@@ -543,7 +556,8 @@ with tab7:
|
|
| 543 |
)
|
| 544 |
st.dataframe(final_data)
|
| 545 |
|
| 546 |
-
with
|
|
|
|
| 547 |
st.header("Raw Data")
|
| 548 |
d = data.astype(str)
|
| 549 |
st.dataframe(d)
|
|
|
|
| 34 |
|
| 35 |
supported_revisions = ["27_09_22"]
|
| 36 |
|
| 37 |
+
st.cache(allow_output_mutation=True)
|
| 38 |
def process_dataset(version):
|
| 39 |
# Load dataset at specified revision
|
| 40 |
dataset = load_dataset("open-source-metrics/model-repos-stats", revision=version)
|
|
|
|
| 71 |
total_samples = data.shape[0]
|
| 72 |
st.metric(label="Total models", value=total_samples)
|
| 73 |
|
| 74 |
+
# Tabs don't work in Spaces st version
|
| 75 |
+
#tab1, tab2, tab3, tab4, tab5, tab6, tab7, tab8 = st.tabs(["Language", "License", "Pipeline", "Discussion Features", "Libraries", "Model Cards", "Super users", "Raw Data"])
|
| 76 |
|
| 77 |
+
tab = st.selectbox(
|
| 78 |
+
'Topic of interest',
|
| 79 |
+
["Language", "License", "Pipeline", "Discussion Features", "Libraries", "Model Cards", "Super users", "Raw Data"])
|
| 80 |
+
|
| 81 |
+
# with tab1:
|
| 82 |
+
if tab == "Language":
|
| 83 |
st.header("Languages info")
|
| 84 |
|
| 85 |
data.loc[data.languages == "False", 'languages'] = None
|
|
|
|
| 172 |
|
| 173 |
|
| 174 |
|
| 175 |
+
#with tab2:
|
| 176 |
+
if tab == "License":
|
| 177 |
st.header("License info")
|
| 178 |
|
| 179 |
no_license_count = data["license"].isna().sum()
|
|
|
|
| 217 |
d = data["license"].value_counts().rename_axis("license").to_frame('counts').reset_index()
|
| 218 |
st.dataframe(d)
|
| 219 |
|
| 220 |
+
#with tab3:
|
| 221 |
+
if tab == "Pipeline":
|
| 222 |
st.header("Pipeline info")
|
| 223 |
|
| 224 |
tags = data["tags"].explode()
|
|
|
|
| 406 |
# todo : add activity metric
|
| 407 |
|
| 408 |
|
| 409 |
+
#with tab4:
|
| 410 |
+
if tab == "Discussion Features":
|
| 411 |
st.header("Discussions Tab info")
|
| 412 |
|
| 413 |
columns_of_interest = ["prs_count", "prs_open", "prs_merged", "prs_closed", "discussions_count", "discussions_open", "discussions_closed"]
|
|
|
|
| 434 |
filtered_data = data[["repo_id", "prs_count", "prs_open", "prs_merged", "prs_closed", "discussions_count", "discussions_open", "discussions_closed"]].sort_values("prs_count", ascending=False).reset_index(drop=True)
|
| 435 |
st.dataframe(filtered_data)
|
| 436 |
|
| 437 |
+
#with tab5:
|
| 438 |
+
if tab == "Libraries":
|
| 439 |
st.header("Library info")
|
| 440 |
|
| 441 |
no_library_count = data["library"].isna().sum()
|
|
|
|
| 501 |
filtered_data = filtered_data[columns_of_interest]
|
| 502 |
st.dataframe(filtered_data)
|
| 503 |
|
| 504 |
+
#with tab6:
|
| 505 |
+
if tab == "Model Cards":
|
| 506 |
st.header("Model cards")
|
| 507 |
|
| 508 |
columns_of_interest = ["has_model_index", "has_metadata", "has_text", "text_length"]
|
|
|
|
| 545 |
y=alt.X('tag', sort=None)
|
| 546 |
))
|
| 547 |
|
| 548 |
+
#with tab7:
|
| 549 |
+
if tab == "Super Users":
|
| 550 |
st.header("Authors")
|
| 551 |
st.text("This info corresponds to the repos owned by the authors")
|
| 552 |
authors = data.groupby("author").sum().drop(["text_length", "Unnamed: 0", "language_count"], axis=1).sort_values("downloads_30d", ascending=False)
|
|
|
|
| 556 |
)
|
| 557 |
st.dataframe(final_data)
|
| 558 |
|
| 559 |
+
#with tab2:
|
| 560 |
+
if tab == "Raw Data":
|
| 561 |
st.header("Raw Data")
|
| 562 |
d = data.astype(str)
|
| 563 |
st.dataframe(d)
|