Spaces:
Build error
Build error
| from huggingface_hub import HfApi | |
| import pandas as pd | |
| import os | |
| import streamlit as st | |
| import altair as alt | |
| import numpy as np | |
| import datetime | |
| from huggingface_hub import Repository | |
| from transformers.models.auto.configuration_auto import CONFIG_MAPPING_NAMES | |
| from transformers.models.auto.modeling_auto import ( | |
| MODEL_FOR_CTC_MAPPING_NAMES, | |
| MODEL_FOR_AUDIO_CLASSIFICATION_MAPPING_NAMES, | |
| MODEL_FOR_AUDIO_FRAME_CLASSIFICATION_MAPPING_NAMES, | |
| MODEL_FOR_AUDIO_XVECTOR_MAPPING_NAMES, | |
| MODEL_FOR_SPEECH_SEQ_2_SEQ_MAPPING_NAMES, | |
| MODEL_FOR_VISION_2_SEQ_MAPPING_NAMES, | |
| MODEL_FOR_INSTANCE_SEGMENTATION_MAPPING_NAMES, | |
| MODEL_FOR_SEMANTIC_SEGMENTATION_MAPPING_NAMES, | |
| MODEL_FOR_IMAGE_CLASSIFICATION_MAPPING_NAMES, | |
| MODEL_FOR_CAUSAL_IMAGE_MODELING_MAPPING_NAMES, | |
| MODEL_FOR_MASKED_IMAGE_MODELING_MAPPING_NAMES, | |
| MODEL_FOR_TABLE_QUESTION_ANSWERING_MAPPING_NAMES, | |
| MODEL_FOR_VISUAL_QUESTION_ANSWERING_MAPPING_NAMES, | |
| MODEL_FOR_DOCUMENT_QUESTION_ANSWERING_MAPPING_NAMES, | |
| MODEL_FOR_BACKBONE_MAPPING_NAMES, | |
| MODEL_FOR_ZERO_SHOT_IMAGE_CLASSIFICATION_MAPPING_NAMES, | |
| ) | |
| audio_models = list(MODEL_FOR_CTC_MAPPING_NAMES.keys()) + list(MODEL_FOR_AUDIO_CLASSIFICATION_MAPPING_NAMES.keys()) + \ | |
| list(MODEL_FOR_SPEECH_SEQ_2_SEQ_MAPPING_NAMES.keys()) + list(MODEL_FOR_AUDIO_FRAME_CLASSIFICATION_MAPPING_NAMES.keys()) + \ | |
| list(MODEL_FOR_AUDIO_XVECTOR_MAPPING_NAMES.keys()) | |
| vision_models = list(MODEL_FOR_VISION_2_SEQ_MAPPING_NAMES.keys()) + list(MODEL_FOR_INSTANCE_SEGMENTATION_MAPPING_NAMES.keys()) + \ | |
| list(MODEL_FOR_SEMANTIC_SEGMENTATION_MAPPING_NAMES.keys()) + list(MODEL_FOR_IMAGE_CLASSIFICATION_MAPPING_NAMES.keys()) + \ | |
| list(MODEL_FOR_CAUSAL_IMAGE_MODELING_MAPPING_NAMES.keys()) + list(MODEL_FOR_MASKED_IMAGE_MODELING_MAPPING_NAMES.keys()) + \ | |
| list(MODEL_FOR_TABLE_QUESTION_ANSWERING_MAPPING_NAMES.keys()) + list(MODEL_FOR_VISUAL_QUESTION_ANSWERING_MAPPING_NAMES.keys()) + \ | |
| list(MODEL_FOR_DOCUMENT_QUESTION_ANSWERING_MAPPING_NAMES.keys()) + list(MODEL_FOR_BACKBONE_MAPPING_NAMES.keys()) + \ | |
| list(MODEL_FOR_ZERO_SHOT_IMAGE_CLASSIFICATION_MAPPING_NAMES.keys()) | |
| today = datetime.date.today() | |
| year, week, _ = today.isocalendar() | |
| DATASET_REPO_URL = ( | |
| "https://huggingface.co/datasets/huggingface/transformers-stats-space-data" | |
| ) | |
| DATA_FILENAME = f"data_{week}_{year}.csv" | |
| DATA_FILE = os.path.join("data", DATA_FILENAME) | |
| HF_TOKEN = os.environ.get("HF_TOKEN") | |
| print("is none?", HF_TOKEN is None) | |
| def retrieve_model_stats(): | |
| hf_api = HfApi() | |
| all_stats = {} | |
| total_downloads = 0 | |
| for model_name in list(CONFIG_MAPPING_NAMES.keys()): | |
| if model_name in audio_models: | |
| modality = "audio" | |
| elif model_name in vision_models: | |
| modality = "vision" | |
| else: | |
| modality = "text" | |
| model_stats = { | |
| "num_downloads": 0, | |
| "%_of_all_downloads": 0, | |
| "num_models": 0, | |
| "download_per_model": 0, | |
| "modality": modality, | |
| } | |
| models = list(hf_api.list_models(filter=model_name)) | |
| model_stats["num_models"] = len(models) | |
| model_stats["num_downloads"] = sum( | |
| [m.downloads for m in models if hasattr(m, "downloads")] | |
| ) | |
| if len(models) > 0: | |
| model_stats["download_per_model"] = int( | |
| model_stats["num_downloads"] / len(models) | |
| ) | |
| else: | |
| model_stats["download_per_model"] = model_stats["num_downloads"] | |
| total_downloads += model_stats["num_downloads"] | |
| # save in overall dict | |
| all_stats[model_name] = model_stats | |
| for model_name in list(CONFIG_MAPPING_NAMES.keys()): | |
| all_stats[model_name]["%_of_all_downloads"] = ( | |
| round(all_stats[model_name]["num_downloads"] / total_downloads, 5) * 100 | |
| ) # noqa: E501 | |
| downloads = all_stats[model_name]["num_downloads"] | |
| all_stats[model_name]["num_downloads"] = f"{downloads:,}" | |
| sorted_results = dict( | |
| reversed(sorted(all_stats.items(), key=lambda d: d[1]["%_of_all_downloads"])) | |
| ) | |
| dataframe = pd.DataFrame.from_dict(sorted_results, orient="index") | |
| # give header to model names | |
| result = "model_names" + dataframe.to_csv() | |
| return result | |
| repo = Repository(local_dir="data", clone_from=DATASET_REPO_URL, use_auth_token=HF_TOKEN) | |
| if not os.path.isfile(DATA_FILE): | |
| st.title("You are the first this week!!! Please wait until the new data is generated and written") | |
| result = retrieve_model_stats() | |
| if not os.path.isfile(DATA_FILE): | |
| with open(DATA_FILE, "w") as f: | |
| f.write(result) | |
| commit_url = repo.push_to_hub() | |
| print(commit_url) | |
| with open(DATA_FILE, "r") as f: | |
| dataframe = pd.read_csv(DATA_FILE) | |
| int_downloads = np.array( | |
| [int(x.replace(",", "")) for x in dataframe["num_downloads"].values] | |
| ) | |
| st.title(f"Stats for year {year} and week {week}") | |
| # print top 20 downloads | |
| source = pd.DataFrame( | |
| { | |
| "Number of total downloads": int_downloads[:20], | |
| "Model architecture name": dataframe["model_names"].values[:20], | |
| } | |
| ) | |
| bar_chart = ( | |
| alt.Chart(source) | |
| .mark_bar() | |
| .encode( | |
| y="Number of total downloads", | |
| x=alt.X("Model architecture name", sort=None), | |
| ) | |
| ) | |
| st.title("Top 20 downloads last 30 days") | |
| st.altair_chart(bar_chart, use_container_width=True) | |
| # print bottom 20 downloads | |
| source = pd.DataFrame( | |
| { | |
| "Number of total downloads": int_downloads[-20:], | |
| "Model architecture name": dataframe["model_names"].values[-20:], | |
| } | |
| ) | |
| bar_chart = ( | |
| alt.Chart(source) | |
| .mark_bar() | |
| .encode( | |
| y="Number of total downloads", | |
| x=alt.X("Model architecture name", sort=None), | |
| ) | |
| ) | |
| st.title("Bottom 20 downloads last 30 days") | |
| st.altair_chart(bar_chart, use_container_width=True) | |
| # print vision | |
| df_vision = dataframe[dataframe["modality"] == "vision"] | |
| vision_int_downloads = np.array( | |
| [int(x.replace(",", "")) for x in df_vision["num_downloads"].values] | |
| ) | |
| source = pd.DataFrame( | |
| { | |
| "Number of total downloads": vision_int_downloads, | |
| "Model architecture name": df_vision["model_names"].values, | |
| } | |
| ) | |
| bar_chart = ( | |
| alt.Chart(source) | |
| .mark_bar() | |
| .encode( | |
| y="Number of total downloads", | |
| x=alt.X("Model architecture name", sort=None), | |
| ) | |
| ) | |
| st.title("Vision downloads last 30 days") | |
| st.altair_chart(bar_chart, use_container_width=True) | |
| # print audio | |
| df_audio = dataframe[dataframe["modality"] == "audio"] | |
| audio_int_downloads = np.array( | |
| [int(x.replace(",", "")) for x in df_audio["num_downloads"].values] | |
| ) | |
| source = pd.DataFrame( | |
| { | |
| "Number of total downloads": audio_int_downloads, | |
| "Model architecture name": df_audio["model_names"].values, | |
| } | |
| ) | |
| bar_chart = ( | |
| alt.Chart(source) | |
| .mark_bar() | |
| .encode( | |
| y="Number of total downloads", | |
| x=alt.X("Model architecture name", sort=None), | |
| ) | |
| ) | |
| st.title("Audio downloads last 30 days") | |
| st.altair_chart(bar_chart, use_container_width=True) | |
| # print all stats | |
| st.title("All stats last 30 days") | |
| st.table(dataframe) | |
| st.title("Vision stats last 30 days") | |
| st.table(dataframe[dataframe["modality"] == "vision"].drop("modality", axis=1)) | |
| st.title("Audio stats last 30 days") | |
| st.table(dataframe[dataframe["modality"] == "audio"].drop("modality", axis=1)) | |