Spaces:
Runtime error
Runtime error
included upto models architectures
Browse files- app.py +260 -16
- task_arch.json +1 -0
app.py
CHANGED
|
@@ -1,24 +1,268 @@
|
|
| 1 |
import streamlit as st
|
| 2 |
-
from transformers import
|
| 3 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 4 |
|
| 5 |
-
|
| 6 |
-
st.write(x, 'squared is', x * x)
|
| 7 |
|
| 8 |
-
|
| 9 |
-
model="julien-c/hotdog-not-hotdog")
|
| 10 |
|
| 11 |
-
st.
|
| 12 |
|
| 13 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 14 |
|
| 15 |
-
|
| 16 |
-
col1, col2 = st.columns(2)
|
| 17 |
|
| 18 |
-
|
| 19 |
-
col1.image(image, use_column_width=True)
|
| 20 |
-
predictions = pipeline(image)
|
| 21 |
|
| 22 |
-
|
| 23 |
-
|
| 24 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
import streamlit as st
|
| 2 |
+
from transformers import (
|
| 3 |
+
AutoTokenizer,
|
| 4 |
+
XLNetTokenizer
|
| 5 |
+
)
|
| 6 |
+
import pathlib
|
| 7 |
+
import json
|
| 8 |
|
| 9 |
+
st.set_page_config(layout='wide')
|
|
|
|
| 10 |
|
| 11 |
+
st.title("Transformers library For NLP Tasks : Structured by Topics")
|
|
|
|
| 12 |
|
| 13 |
+
st.write("lets start with the architectures of models")
|
| 14 |
|
| 15 |
+
neural_net_models = dict({
|
| 16 |
+
'encoder': "responsible for understanding the input text.",
|
| 17 |
+
'decoder': "designed to generate new texts answering queries.",
|
| 18 |
+
'encoder-decoder': "understand and generate text & have emergent behaviour",
|
| 19 |
+
'convolution': "used for image recognition and processing.",
|
| 20 |
+
})
|
| 21 |
+
model_types = list(neural_net_models.keys())
|
| 22 |
|
| 23 |
+
archs = st.radio("model architectures".capitalize(), model_types)
|
|
|
|
| 24 |
|
| 25 |
+
st.write(f"{archs.capitalize()} are {neural_net_models[archs]}")
|
|
|
|
|
|
|
| 26 |
|
| 27 |
+
domains = dict({
|
| 28 |
+
"computer_vision": {
|
| 29 |
+
"encoder": ['vit', 'swin', 'segformer', 'beit'],
|
| 30 |
+
"decoder": ['imagegpt'],
|
| 31 |
+
"encoder-decoder": ['detr'],
|
| 32 |
+
"convolution": ['convnext']
|
| 33 |
+
},
|
| 34 |
+
"nlp": {
|
| 35 |
+
"encoder": ["bert", "roberta", "albert", "distillbert",
|
| 36 |
+
"deberta", "longformer",],
|
| 37 |
+
"decoder": ["gpt-2", "xlnet", "gpt-j", "opt", "bloom"],
|
| 38 |
+
"encoder-decoder": ["bart", "pegasus", "t5", ],
|
| 39 |
+
},
|
| 40 |
+
"audio": {
|
| 41 |
+
"encoder": ["wav2vec2", "hubert"],
|
| 42 |
+
"encoder-decoder": ["speech2text", "whisper"]
|
| 43 |
+
},
|
| 44 |
+
"multimodal": {
|
| 45 |
+
"encoder": ["visualbert", "vilt", "clip", "owl-vit"],
|
| 46 |
+
"encoder-decoder": ["trocr", "donut"]
|
| 47 |
+
},
|
| 48 |
+
"reinforcement": {
|
| 49 |
+
"decoder": ["trajectory transformer", "decision transformer"]
|
| 50 |
+
}
|
| 51 |
+
})
|
| 52 |
+
|
| 53 |
+
st.write("Lets look at the Individual domains")
|
| 54 |
+
|
| 55 |
+
domain_list = list(domains.keys())
|
| 56 |
+
|
| 57 |
+
doms = st.radio("domains of ai".capitalize(), domain_list)
|
| 58 |
+
|
| 59 |
+
st.write(domains[doms])
|
| 60 |
+
|
| 61 |
+
st.write("Now comes the Tokenizers, the Entry Points")
|
| 62 |
+
|
| 63 |
+
tokenizer_algos = {
|
| 64 |
+
"byte_pair": {
|
| 65 |
+
"base": ['gpt', 'gpt-2(byte_level)'],
|
| 66 |
+
"intro": "https://arxiv.org/abs/1508.07909"
|
| 67 |
+
},
|
| 68 |
+
"wordpiece":{
|
| 69 |
+
"base": ['bert', 'distilbert', 'electra'],
|
| 70 |
+
"intro": "https://static.googleusercontent.com/media/research.google.com/ja//pubs/archive/37842.pdf"
|
| 71 |
+
},
|
| 72 |
+
"unigram": {
|
| 73 |
+
"base": ['not_used'],
|
| 74 |
+
"intro": "https://arxiv.org/pdf/1804.10959.pdf"
|
| 75 |
+
},
|
| 76 |
+
"sentencepiece":{
|
| 77 |
+
"base": ["xlm", "albert", "xlnet", "marian", "t5"],
|
| 78 |
+
"intro": "https://arxiv.org/pdf/1808.06226.pdf"
|
| 79 |
+
}
|
| 80 |
+
}
|
| 81 |
+
|
| 82 |
+
tokenizer_items = list(tokenizer_algos.keys())
|
| 83 |
+
|
| 84 |
+
algos = st.radio("tokenizer algos".capitalize(), tokenizer_items)
|
| 85 |
+
|
| 86 |
+
st.write(tokenizer_algos[algos])
|
| 87 |
+
|
| 88 |
+
st.write("""We will work on 3 types of tokenizers on a single sentence
|
| 89 |
+
to see how their output differs, by first encoding and decoding them too.""")
|
| 90 |
+
|
| 91 |
+
st.markdown("""### Models in Review:
|
| 92 |
+
- gpt2
|
| 93 |
+
- bert-base-uncased
|
| 94 |
+
- xlm""")
|
| 95 |
+
|
| 96 |
+
input_sentence = "This is a sample sentence for testing tokenizers"
|
| 97 |
+
|
| 98 |
+
gpt2_model = "gpt2"
|
| 99 |
+
bert_model = "bert-base-uncased"
|
| 100 |
+
xlm_model = "xlnet-base-cased"
|
| 101 |
+
|
| 102 |
+
gpt2_tokenizer = AutoTokenizer.from_pretrained(gpt2_model)
|
| 103 |
+
bert_tokenizer = AutoTokenizer.from_pretrained(bert_model)
|
| 104 |
+
xlm_tokenizer = XLNetTokenizer.from_pretrained(xlm_model)
|
| 105 |
+
|
| 106 |
+
st.markdown("#### The input sentence is")
|
| 107 |
+
st.write("The Sample Sentence: ", input_sentence)
|
| 108 |
+
|
| 109 |
+
gpt2_tokenize = gpt2_tokenizer.tokenize(input_sentence)
|
| 110 |
+
bert_tokenize = bert_tokenizer.tokenize(input_sentence)
|
| 111 |
+
xlm_tokenize = xlm_tokenizer.tokenize(input_sentence)
|
| 112 |
+
|
| 113 |
+
with st.expander(label="Byte Pair Tokenizer", expanded=False):
|
| 114 |
+
st.write("gpt2_tokenize = gpt2_tokenizer.tokenize(input_sentence)")
|
| 115 |
+
st.write(gpt2_tokenize)
|
| 116 |
+
with st.expander(label="Word Piece Tokenizer", expanded=False):
|
| 117 |
+
st.write("bert_tokenize = bert_tokenizer.tokenize(input_sentence)")
|
| 118 |
+
st.write(bert_tokenize)
|
| 119 |
+
with st.expander(label="SentencePiece Tokenizer", expanded=False):
|
| 120 |
+
st.write("xlm_tokenize = xlm_tokenizer.tokenize(input_sentence)")
|
| 121 |
+
st.write(xlm_tokenize)
|
| 122 |
+
|
| 123 |
+
st.markdown("""#### Tokenizer Options:
|
| 124 |
+
There are following parameters in Tokenizer object are most used
|
| 125 |
+
- padding = 'longest'(True), 'max_length', 'do_not_pad'(False)
|
| 126 |
+
- truncation = 'longest_first'(True), 'only_second', 'only_first',
|
| 127 |
+
'do_not_truncate'(False)
|
| 128 |
+
- max_length = <= model_max_length """)
|
| 129 |
+
## Refer to https://huggingface.co/docs/transformers/pad_truncation
|
| 130 |
+
gpt2_max_length = gpt2_tokenizer.model_max_length
|
| 131 |
+
bert_max_length = bert_tokenizer.model_max_length
|
| 132 |
+
xlm_max_length = "Not Speced"
|
| 133 |
+
|
| 134 |
+
st.markdown("""We also need the model max length, which is the
|
| 135 |
+
what the model is configured with.""")
|
| 136 |
+
st.write("GPT: ", gpt2_max_length)
|
| 137 |
+
st.write("Bert: ", bert_max_length)
|
| 138 |
+
st.write("XLM: ", xlm_max_length)
|
| 139 |
+
|
| 140 |
+
sent1 = "This app is talking about the variety of Tokenizers and their outputs"
|
| 141 |
+
sent2 = """Tokenizers do one thing, bring out numbers from text. The better numbers far better
|
| 142 |
+
the results"""
|
| 143 |
+
|
| 144 |
+
st.write("We will be working with the following sentences.")
|
| 145 |
+
st.write("Sentence1: ", sent1)
|
| 146 |
+
st.write("Sentence2: ", sent2)
|
| 147 |
+
|
| 148 |
+
st.markdown("#### Tokenization in Action. Using GPT Tokenizer")
|
| 149 |
+
st.markdown("""##### Trial-1:
|
| 150 |
+
> No parameter provided
|
| 151 |
+
> Sentences are given with comma seperation""")
|
| 152 |
+
gpt2_encode = gpt2_tokenizer(sent1, sent2)
|
| 153 |
+
st.write(gpt2_encode)
|
| 154 |
+
|
| 155 |
+
st.markdown("""##### Trial-2:
|
| 156 |
+
> No parameter provided
|
| 157 |
+
> Sentences are made into a List""")
|
| 158 |
+
gpt2_encode = gpt2_tokenizer([sent1, sent2])
|
| 159 |
+
st.write("gpt2_encode = gpt2_tokenizer([sent1, sent2])")
|
| 160 |
+
st.write(gpt2_encode)
|
| 161 |
+
|
| 162 |
+
# gpt2_tokenizer.add_special_tokens({'pad_token': '[PAD]'})
|
| 163 |
+
gpt2_tokenizer.pad_token_id = gpt2_tokenizer.eos_token_id
|
| 164 |
+
|
| 165 |
+
st.markdown("""##### Trial-3:
|
| 166 |
+
> Need to add pad token to tokenizer, if the model doesn't have.
|
| 167 |
+
> padding = True
|
| 168 |
+
> Sentences are made into a List""")
|
| 169 |
+
gpt2_encode = gpt2_tokenizer([sent1, sent2], padding=True)
|
| 170 |
+
st.write("gpt2_encode = gpt2_tokenizer([sent1, sent2], padding=True)")
|
| 171 |
+
st.write(gpt2_encode)
|
| 172 |
+
|
| 173 |
+
st.markdown("""##### Trial-4:
|
| 174 |
+
> Need to add pad token to tokenizer, if the model doesn't have.
|
| 175 |
+
> padding = max_length (requires max_length = int)
|
| 176 |
+
> Sentences are made into a List""")
|
| 177 |
+
gpt2_encode = gpt2_tokenizer([sent1, sent2],
|
| 178 |
+
padding=True,
|
| 179 |
+
max_length=15)
|
| 180 |
+
st.write("""gpt2_encode = gpt2_tokenizer([sent1, sent2],
|
| 181 |
+
padding=True,
|
| 182 |
+
max_length=15""")
|
| 183 |
+
|
| 184 |
+
st.write(gpt2_encode)
|
| 185 |
+
|
| 186 |
+
st.markdown("""##### Trial-5:
|
| 187 |
+
> truncate = True (requires max_length = int)
|
| 188 |
+
> Sentences are seperated by a comma
|
| 189 |
+
Will see total output of 12 token, 6 per sentence""")
|
| 190 |
+
|
| 191 |
+
gpt2_encode = gpt2_tokenizer(sent1, sent2,
|
| 192 |
+
truncation=True,
|
| 193 |
+
max_length=12)
|
| 194 |
+
st.write("""gpt2_encode = gpt2_tokenizer(sent1, sent2,
|
| 195 |
+
truncation=True,
|
| 196 |
+
max_length=12)""")
|
| 197 |
+
|
| 198 |
+
st.write(gpt2_encode)
|
| 199 |
+
|
| 200 |
+
st.markdown("""##### Trial-6:
|
| 201 |
+
> truncate = True (requires max_length = int)
|
| 202 |
+
> Sentences are made into a list
|
| 203 |
+
Will have longest first""")
|
| 204 |
+
|
| 205 |
+
gpt2_encode = gpt2_tokenizer([sent1, sent2],
|
| 206 |
+
truncation=True,
|
| 207 |
+
max_length=12)
|
| 208 |
+
st.write("""gpt2_encode = gpt2_tokenizer([sent1, sent2],
|
| 209 |
+
truncation=True,
|
| 210 |
+
max_length=12)""")
|
| 211 |
+
|
| 212 |
+
st.write(gpt2_encode)
|
| 213 |
+
|
| 214 |
+
st.markdown("""##### Trial-7:
|
| 215 |
+
> truncate = only_first
|
| 216 |
+
> Sentences are made into a list
|
| 217 |
+
Will have only 8 tokens """)
|
| 218 |
+
|
| 219 |
+
gpt2_encode = gpt2_tokenizer([sent1, sent2],
|
| 220 |
+
truncation='only_first',
|
| 221 |
+
max_length=8)
|
| 222 |
+
st.write("""gpt2_encode = gpt2_tokenizer([sent1, sent2],
|
| 223 |
+
truncation='only_first',
|
| 224 |
+
max_length=8)""")
|
| 225 |
+
|
| 226 |
+
st.write(gpt2_encode)
|
| 227 |
+
|
| 228 |
+
st.markdown("""##### Trial-8:
|
| 229 |
+
> truncate = False (only_second, is erroring out)
|
| 230 |
+
> Sentences are made into a list
|
| 231 |
+
No Truncation, 2 ids list""")
|
| 232 |
+
|
| 233 |
+
gpt2_encode = gpt2_tokenizer([sent1, sent2],
|
| 234 |
+
truncation=False,
|
| 235 |
+
max_length=7)
|
| 236 |
+
st.write("""gpt2_encode = gpt2_tokenizer([sent1, sent2],
|
| 237 |
+
truncation=False,
|
| 238 |
+
max_length=7)""")
|
| 239 |
+
|
| 240 |
+
st.write(gpt2_encode)
|
| 241 |
+
|
| 242 |
+
curr_dir = pathlib.Path(__file__).parent.resolve()
|
| 243 |
+
file_loc = curr_dir / "task_arch.json"
|
| 244 |
+
file_loc = file_loc.resolve()
|
| 245 |
+
|
| 246 |
+
with open(file_loc, 'r') as arch:
|
| 247 |
+
data = json.load(arch)
|
| 248 |
+
|
| 249 |
+
tasks = list(data.keys())
|
| 250 |
+
st.markdown("#### Lets dive into the model architectures...")
|
| 251 |
+
|
| 252 |
+
task = st.radio("The NLP tasks", tasks)
|
| 253 |
+
|
| 254 |
+
task_data = data[task]
|
| 255 |
+
|
| 256 |
+
num_models = len(task_data['architectures'])
|
| 257 |
+
|
| 258 |
+
show_archs = st.slider("How many archs to Show",
|
| 259 |
+
min_value=4, max_value=num_models)
|
| 260 |
+
|
| 261 |
+
pruned_data = {
|
| 262 |
+
"architectures": task_data['architectures'][:show_archs],
|
| 263 |
+
"AutoModelClass": task_data["AutoModelClass"],
|
| 264 |
+
"dataset": task_data["dataset"],
|
| 265 |
+
"model_used": task_data["model_used"]
|
| 266 |
+
}
|
| 267 |
+
|
| 268 |
+
st.write(pruned_data)
|
task_arch.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"text_classification": {"architectures": ["ALBERT", "BART", "BERT", "BigBird", "BigBird-Pegasus", "BioGpt", "BLOOM", "CamemBERT", "CANINE", "CodeLlama", "ConvBERT", "CTRL", "Data2VecText", "DeBERTa", "DeBERTa-v2", "DistilBERT", "ELECTRA", "ERNIE", "ErnieM", "ESM", "Falcon", "FlauBERT", "FNet", "Funnel Transformer", "GPT-Sw3", "OpenAI GPT-2", "GPTBigCode", "GPT Neo", "GPT NeoX", "GPT-J", "I-BERT", "LayoutLM", "LayoutLMv2", "LayoutLMv3", "LED", "LiLT", "LLaMA", "Longformer", "LUKE", "MarkupLM", "mBART", "MEGA", "Megatron-BERT", "Mistral", "Mixtral", "MobileBERT", "MPNet", "MPT", "MRA", "MT5", "MVP", "Nezha", "Nystr\u00f6mformer", "OpenLlama", "OpenAI GPT", "OPT", "Perceiver", "Persimmon", "Phi", "PLBart", "QDQBert", "Qwen2", "Reformer", "RemBERT", "RoBERTa", "RoBERTa-PreLayerNorm", "RoCBert", "RoFormer", "SqueezeBERT", "T5", "TAPAS", "Transformer-XL", "UMT5", "XLM", "XLM-RoBERTa", "XLM-RoBERTa-XL", "XLNet", "X-MOD", "YOSO"], "AutoModelClass": "AutoModelForSequenceClassification", "dataset": "imdb", "model_used": "distilbert-base-uncased"}, "token_classification": {"architectures": ["ALBERT", "BERT", "BigBird", "BioGpt", "BLOOM", "BROS", "CamemBERT", "CANINE", "ConvBERT", "Data2VecText", "DeBERTa", "DeBERTa-v2", "DistilBERT", "ELECTRA", "ERNIE", "ErnieM", "ESM", "Falcon", "FlauBERT", "FNet", "Funnel Transformer", "GPT-Sw3", "OpenAI GPT-2", "GPTBigCode", "GPT Neo", "GPT NeoX", "I-BERT", "LayoutLM", "LayoutLMv2", "LayoutLMv3", "LiLT", "Longformer", "LUKE", "MarkupLM", "MEGA", "Megatron-BERT", "MobileBERT", "MPNet", "MPT", "MRA", "MT5", "Nezha", "Nystr\u00f6mformer", "Phi", "QDQBert", "RemBERT", "RoBERTa", "RoBERTa-PreLayerNorm", "RoCBert", "RoFormer", "SqueezeBERT", "T5", "UMT5", "XLM", "XLM-RoBERTa", "XLM-RoBERTa-XL", "XLNet", "X-MOD", "YOSO"], "AutoModelClass": "AutoModelForTokenClassification", "dataset": "wnut_17", "model_used": "distilbert-base-uncased"}, "question_answering": {"architectures": ["ALBERT", "BART", "BERT", "BigBird", "BigBird-Pegasus", "BLOOM", "CamemBERT", "CANINE", "ConvBERT", "Data2VecText", "DeBERTa", "DeBERTa-v2", "DistilBERT", "ELECTRA", "ERNIE", "ErnieM", "Falcon", "FlauBERT", "FNet", "Funnel Transformer", "OpenAI GPT-2", "GPT Neo", "GPT NeoX", "GPT-J", "I-BERT", "LayoutLMv2", "LayoutLMv3", "LED", "LiLT", "LLaMA", "Longformer", "LUKE", "LXMERT", "MarkupLM", "mBART", "MEGA", "Megatron-BERT", "MobileBERT", "MPNet", "MPT", "MRA", "MT5", "MVP", "Nezha", "Nystr\u00f6mformer", "OPT", "QDQBert", "Reformer", "RemBERT", "RoBERTa", "RoBERTa-PreLayerNorm", "RoCBert", "RoFormer", "Splinter", "SqueezeBERT", "T5", "UMT5", "XLM", "XLM-RoBERTa", "XLM-RoBERTa-XL", "XLNet", "X-MOD", "YOSO"], "AutoModelClass": "AutoModelForQuestionAnswering", "dataset": "squad", "model_used": "distilbert-base-uncased"}, "causal_lm": {"architectures": ["BART", "BERT", "Bert Generation", "BigBird", "BigBird-Pegasus", "BioGpt", "Blenderbot", "BlenderbotSmall", "BLOOM", "CamemBERT", "CodeLlama", "CodeGen", "CPM-Ant", "CTRL", "Data2VecText", "ELECTRA", "ERNIE", "Falcon", "Fuyu", "GIT", "GPT-Sw3", "OpenAI GPT-2", "GPTBigCode", "GPT Neo", "GPT NeoX", "GPT NeoX Japanese", "GPT-J", "LLaMA", "Marian", "mBART", "MEGA", "Megatron-BERT", "Mistral", "Mixtral", "MPT", "MusicGen", "MVP", "OpenLlama", "OpenAI GPT", "OPT", "Pegasus", "Persimmon", "Phi", "PLBart", "ProphetNet", "QDQBert", "Qwen2", "Reformer", "RemBERT", "RoBERTa", "RoBERTa-PreLayerNorm", "RoCBert", "RoFormer", "RWKV", "Speech2Text2", "Transformer-XL", "TrOCR", "Whisper", "XGLM", "XLM", "XLM-ProphetNet", "XLM-RoBERTa", "XLM-RoBERTa-XL", "XLNet", "X-MOD"], "AutoModelClass": "AutoModelForCausalLM", "dataset": "eli5_category", "model_used": "distilgpt2"}, "masked_lm": {"architectures": ["ALBERT", "BART", "BERT", "BigBird", "CamemBERT", "ConvBERT", "Data2VecText", "DeBERTa", "DeBERTa-v2", "DistilBERT", "ELECTRA", "ERNIE", "ESM", "FlauBERT", "FNet", "Funnel Transformer", "I-BERT", "LayoutLM", "Longformer", "LUKE", "mBART", "MEGA", "Megatron-BERT", "MobileBERT", "MPNet", "MRA", "MVP", "Nezha", "Nystr\u00f6mformer", "Perceiver", "QDQBert", "Reformer", "RemBERT", "RoBERTa", "RoBERTa-PreLayerNorm", "RoCBert", "RoFormer", "SqueezeBERT", "TAPAS", "Wav2Vec2", "XLM", "XLM-RoBERTa", "XLM-RoBERTa-XL", "X-MOD", "YOSO"], "AutoModelClass": "AutoModelForMaskedLM", "dataset": "eli-5", "model_used": "distilroberta-base"}, "translation": {"architectures": ["BART", "BigBird-Pegasus", "Blenderbot", "BlenderbotSmall", "Encoder decoder", "FairSeq Machine-Translation", "GPTSAN-japanese", "LED", "LongT5", "M2M100", "Marian", "mBART", "MT5", "MVP", "NLLB", "NLLB-MOE", "Pegasus", "PEGASUS-X", "PLBart", "ProphetNet", "SeamlessM4T", "SeamlessM4Tv2", "SwitchTransformers", "T5", "UMT5", "XLM-ProphetNet"], "AutoModelClass": "AutoModelForSeq2SeqLM", "dataset": "opus_books", "model_used": "t5-small"}, "summarization": {"architectures": ["BART", "BigBird-Pegasus", "Blenderbot", "BlenderbotSmall", "Encoder decoder", "FairSeq Machine-Translation", "GPTSAN-japanese", "LED", "LongT5", "M2M100", "Marian", "mBART", "MT5", "MVP", "NLLB", "NLLB-MOE", "Pegasus", "PEGASUS-X", "PLBart", "ProphetNet", "SeamlessM4T", "SeamlessM4Tv2", "SwitchTransformers", "T5", "UMT5", "XLM-ProphetNet"], "AutoModelClass": "AutoModelForSeq2SeqLM", "dataset": "billsum", "model_used": "t5-small"}, "multiple_choice": {"architectures": ["ALBERT", "BERT", "BigBird", "CamemBERT", "CANINE", "ConvBERT", "Data2VecText", "DeBERTa-v2", "DistilBERT", "ELECTRA", "ERNIE", "ErnieM", "FlauBERT", "FNet", "Funnel Transformer", "I-BERT", "Longformer", "LUKE", "MEGA", "Megatron-BERT", "MobileBERT", "MPNet", "MRA", "Nezha", "Nystr\u00f6mformer", "QDQBert", "RemBERT", "RoBERTa", "RoBERTa-PreLayerNorm", "RoCBert", "RoFormer", "SqueezeBERT", "XLM", "XLM-RoBERTa", "XLM-RoBERTa-XL", "XLNet", "X-MOD", "YOSO"], "AutoModelClass": "AutoModelForMultipleChoice", "dataset": "swag", "model_used": "bert-base-uncased"}}
|