Spaces:
Build error
Build error
meg-huggingface
commited on
Commit
·
db74ba9
1
Parent(s):
0803ab3
Scripts to generate cache
Browse files- run.sh +112 -0
- run_data_measurements.py +8 -6
run.sh
ADDED
|
@@ -0,0 +1,112 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/env bash
|
| 2 |
+
|
| 3 |
+
|
| 4 |
+
python3 run_data_measurements.py --dataset="hate_speech18" --config="default" --split="train" --label_field="label" --feature="text"
|
| 5 |
+
python3 run_data_measurements.py --dataset="hate_speech_offensive" --config="default" --split="train" --label_field="label" --feature="tweet"
|
| 6 |
+
|
| 7 |
+
|
| 8 |
+
python3 run_data_measurements.py --dataset="imdb" --config="plain_text" --split="train" --label_field="label" --feature="text"
|
| 9 |
+
python3 run_data_measurements.py --dataset="imdb" --config="plain_text" --split="unsupervised" --label_field="label" --feature="text"
|
| 10 |
+
|
| 11 |
+
|
| 12 |
+
python3 run_data_measurements.py --dataset="glue" --config="cola" --split="train" --label_field="label" --feature="sentence"
|
| 13 |
+
python3 run_data_measurements.py --dataset="glue" --config="cola" --split="validation" --label_field="label" --feature="sentence"
|
| 14 |
+
|
| 15 |
+
python3 run_data_measurements.py --dataset="glue" --config="mnli" --split="train" --label_field="label" --feature="hypothesis"
|
| 16 |
+
python3 run_data_measurements.py --dataset="glue" --config="mnli" --split="train" --label_field="label" --feature="premise"
|
| 17 |
+
|
| 18 |
+
python3 run_data_measurements.py --dataset="glue" --config="mnli" --split="validation_matched" --label_field="label" --feature="premise"
|
| 19 |
+
python3 run_data_measurements.py --dataset="glue" --config="mnli" --split="validation_matched" --label_field="label" --feature="hypothesis"
|
| 20 |
+
python3 run_data_measurements.py --dataset="glue" --config="mnli" --split="validation_mismatched" --label_field="label" --feature="premise"
|
| 21 |
+
python3 run_data_measurements.py --dataset="glue" --config="mnli" --split="validation_mismatched" --label_field="label" --feature="hypothesis"
|
| 22 |
+
|
| 23 |
+
|
| 24 |
+
python3 run_data_measurements.py --dataset="glue" --config="mrpc" --split="train" --label_field="label" --feature="sentence1"
|
| 25 |
+
python3 run_data_measurements.py --dataset="glue" --config="mrpc" --split="train" --label_field="label" --feature="sentence2"
|
| 26 |
+
python3 run_data_measurements.py --dataset="glue" --config="mrpc" --split="validation" --label_field="label" --feature="sentence1"
|
| 27 |
+
python3 run_data_measurements.py --dataset="glue" --config="mrpc" --split="validation" --label_field="label" --feature="sentence2"
|
| 28 |
+
|
| 29 |
+
|
| 30 |
+
python3 run_data_measurements.py --dataset="glue" --config="rte" --split="train" --label_field="label" --feature="sentence1"
|
| 31 |
+
python3 run_data_measurements.py --dataset="glue" --config="rte" --split="train" --label_field="label" --feature="sentence2"
|
| 32 |
+
python3 run_data_measurements.py --dataset="glue" --config="rte" --split="validation" --label_field="label" --feature="sentence1"
|
| 33 |
+
python3 run_data_measurements.py --dataset="glue" --config="rte" --split="validation" --label_field="label" --feature="sentence2"
|
| 34 |
+
|
| 35 |
+
|
| 36 |
+
python3 run_data_measurements.py --dataset="glue" --config="stsb" --split="train" --label_field="label" --feature="sentence1"
|
| 37 |
+
python3 run_data_measurements.py --dataset="glue" --config="stsb" --split="train" --label_field="label" --feature="sentence2"
|
| 38 |
+
python3 run_data_measurements.py --dataset="glue" --config="stsb" --split="validation" --label_field="label" --feature="sentence1"
|
| 39 |
+
python3 run_data_measurements.py --dataset="glue" --config="stsb" --split="validation" --label_field="label" --feature="sentence2"
|
| 40 |
+
|
| 41 |
+
python3 run_data_measurements.py --dataset="glue" --config="wnli" --split="train" --label_field="label" --feature="sentence1"
|
| 42 |
+
python3 run_data_measurements.py --dataset="glue" --config="wnli" --split="train" --label_field="label" --feature="sentence2"
|
| 43 |
+
python3 run_data_measurements.py --dataset="glue" --config="wnli" --split="validation" --label_field="label" --feature="sentence1"
|
| 44 |
+
python3 run_data_measurements.py --dataset="glue" --config="wnli" --split="validation" --label_field="label" --feature="sentence2"
|
| 45 |
+
|
| 46 |
+
python3 run_data_measurements.py --dataset="glue" --config="sst2" --split="train" --label_field="label" --feature="sentence"
|
| 47 |
+
python3 run_data_measurements.py --dataset="glue" --config="sst2" --split="validation" --label_field="label" --feature="sentence"
|
| 48 |
+
|
| 49 |
+
|
| 50 |
+
python3 run_data_measurements.py --dataset="glue" --config="qnli" --split="train" --label_field="label" --feature="question"
|
| 51 |
+
python3 run_data_measurements.py --dataset="glue" --config="qnli" --split="train" --label_field="label" --feature="sentence"
|
| 52 |
+
python3 run_data_measurements.py --dataset="glue" --config="qnli" --split="validation" --label_field="label" --feature="question"
|
| 53 |
+
python3 run_data_measurements.py --dataset="glue" --config="qnli" --split="validation" --label_field="label" --feature="sentence"
|
| 54 |
+
|
| 55 |
+
|
| 56 |
+
python3 run_data_measurements.py --dataset="glue" --config="qqp" --split="train" --label_field="label" --feature="question1"
|
| 57 |
+
python3 run_data_measurements.py --dataset="glue" --config="qqp" --split="train" --label_field="label" --feature="question2"
|
| 58 |
+
python3 run_data_measurements.py --dataset="glue" --config="qqp" --split="validation" --label_field="label" --feature="question1"
|
| 59 |
+
python3 run_data_measurements.py --dataset="glue" --config="qqp" --split="validation" --label_field="label" --feature="question2"
|
| 60 |
+
|
| 61 |
+
python3 run_data_measurements.py --dataset="glue" --config="mnli_matched" --split="validation" --label_field="label" --feature="hypothesis"
|
| 62 |
+
python3 run_data_measurements.py --dataset="glue" --config="mnli_matched" --split="validation" --label_field="label" --feature="premise"
|
| 63 |
+
python3 run_data_measurements.py --dataset="glue" --config="mnli_mismatched" --split="validation" --label_field="label" --feature="hypothesis"
|
| 64 |
+
python3 run_data_measurements.py --dataset="glue" --config="mnli_mismatched" --split="validation" --label_field="label" --feature="premise"
|
| 65 |
+
|
| 66 |
+
|
| 67 |
+
python3 run_data_measurements.py --dataset="wikitext" --config="wikitext-103-v1" --split="train" --feature="text"
|
| 68 |
+
python3 run_data_measurements.py --dataset="wikitext" --config="wikitext-103-raw-v1" --split="train" --feature="text"
|
| 69 |
+
python3 run_data_measurements.py --dataset="wikitext" --config="wikitext-2-v1" --split="train" --feature="text"
|
| 70 |
+
python3 run_data_measurements.py --dataset="wikitext" --config="wikitext-2-raw-v1" --split="train" --feature="text"
|
| 71 |
+
python3 run_data_measurements.py --dataset="wikitext" --config="wikitext-103-v1" --split="validation" --feature="text"
|
| 72 |
+
python3 run_data_measurements.py --dataset="wikitext" --config="wikitext-103-raw-v1" --split="validation" --feature="text"
|
| 73 |
+
python3 run_data_measurements.py --dataset="wikitext" --config="wikitext-2-v1" --split="validation" --feature="text"
|
| 74 |
+
python3 run_data_measurements.py --dataset="wikitext" --config="wikitext-2-raw-v1" --split="validation" --feature="text"
|
| 75 |
+
|
| 76 |
+
|
| 77 |
+
# Superglue wsc? wic? rte? record? multirc?
|
| 78 |
+
|
| 79 |
+
python3 run_data_measurements.py --dataset="super_glue" --config="boolq" --split="train" --label_field="label" --feature="question"
|
| 80 |
+
python3 run_data_measurements.py --dataset="super_glue" --config="boolq" --split="validation" --label_field="label" --feature="question"
|
| 81 |
+
python3 run_data_measurements.py --dataset="super_glue" --config="boolq" --split="train" --label_field="label" --feature="passage"
|
| 82 |
+
python3 run_data_measurements.py --dataset="super_glue" --config="boolq" --split="validation" --label_field="label" --feature="passage"
|
| 83 |
+
|
| 84 |
+
python3 run_data_measurements.py --dataset="super_glue" --config="cb" --split="train" --label_field="label" --feature="premise"
|
| 85 |
+
python3 run_data_measurements.py --dataset="super_glue" --config="cb" --split="validation" --label_field="label" --feature="premise"
|
| 86 |
+
python3 run_data_measurements.py --dataset="super_glue" --config="cb" --split="train" --label_field="label" --feature="hypothesis"
|
| 87 |
+
python3 run_data_measurements.py --dataset="super_glue" --config="cb" --split="validation" --label_field="label" --feature="hypothesis"
|
| 88 |
+
|
| 89 |
+
|
| 90 |
+
python3 run_data_measurements.py --dataset="super_glue" --config="copa" --split="train" --label_field="label" --feature="premise"
|
| 91 |
+
python3 run_data_measurements.py --dataset="super_glue" --config="copa" --split="validation" --label_field="label" --feature="premise"
|
| 92 |
+
python3 run_data_measurements.py --dataset="super_glue" --config="copa" --split="train" --label_field="label" --feature="choice1"
|
| 93 |
+
python3 run_data_measurements.py --dataset="super_glue" --config="copa" --split="validation" --label_field="label" --feature="choice1"
|
| 94 |
+
python3 run_data_measurements.py --dataset="super_glue" --config="copa" --split="train" --label_field="label" --feature="choice2"
|
| 95 |
+
python3 run_data_measurements.py --dataset="super_glue" --config="copa" --split="validation" --label_field="label" --feature="choice2"
|
| 96 |
+
python3 run_data_measurements.py --dataset="super_glue" --config="copa" --split="train" --label_field="label" --feature="question"
|
| 97 |
+
python3 run_data_measurements.py --dataset="super_glue" --config="copa" --split="validation" --label_field="label" --feature="question"
|
| 98 |
+
|
| 99 |
+
python3 run_data_measurements.py --dataset="squad" --config="plain_text" --split="train" --feature="context"
|
| 100 |
+
python3 run_data_measurements.py --dataset="squad" --config="plain_text" --split="train" --feature="question"
|
| 101 |
+
python3 run_data_measurements.py --dataset="squad" --config="plain_text" --split="train" --feature="title"
|
| 102 |
+
python3 run_data_measurements.py --dataset="squad" --config="plain_text" --split="validation" --feature="context"
|
| 103 |
+
python3 run_data_measurements.py --dataset="squad" --config="plain_text" --split="validation" --feature="question"
|
| 104 |
+
python3 run_data_measurements.py --dataset="squad" --config="plain_text" --split="validation" --feature="title"
|
| 105 |
+
|
| 106 |
+
|
| 107 |
+
python3 run_data_measurements.py --dataset="squad_v2" --config="squad_v2" --split="train" --feature="context"
|
| 108 |
+
python3 run_data_measurements.py --dataset="squad_v2" --config="squad_v2" --split="train" --feature="question"
|
| 109 |
+
python3 run_data_measurements.py --dataset="squad_v2" --config="squad_v2" --split="train" --feature="title"
|
| 110 |
+
python3 run_data_measurements.py --dataset="squad_v2" --config="squad_v2" --split="validation" --feature="context"
|
| 111 |
+
python3 run_data_measurements.py --dataset="squad_v2" --config="squad_v2" --split="validation" --feature="question"
|
| 112 |
+
python3 run_data_measurements.py --dataset="squad_v2" --config="squad_v2" --split="validation" --feature="title"
|
run_data_measurements.py
CHANGED
|
@@ -25,7 +25,11 @@ def load_or_prepare_widgets(ds_args, show_embeddings=False, use_cache=False):
|
|
| 25 |
# General stats widget
|
| 26 |
dstats.load_or_prepare_general_stats()
|
| 27 |
# Labels widget
|
| 28 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 29 |
# Text lengths widget
|
| 30 |
dstats.load_or_prepare_text_lengths()
|
| 31 |
if show_embeddings:
|
|
@@ -76,9 +80,10 @@ def load_or_prepare(dataset_args, do_html=False, use_cache=False):
|
|
| 76 |
print("Figure saved to %s." % fig_tok_length_fid)
|
| 77 |
print("Done!")
|
| 78 |
|
| 79 |
-
if
|
| 80 |
if not dstats.label_field:
|
| 81 |
-
print("Warning: You asked for label calculation, but didn't provide
|
|
|
|
| 82 |
dstats.set_label_field("label")
|
| 83 |
print("\n* Calculating label distribution.")
|
| 84 |
dstats.load_or_prepare_labels()
|
|
@@ -188,9 +193,6 @@ def main():
|
|
| 188 |
Example for hate speech18 dataset:
|
| 189 |
python3 run_data_measurements.py --dataset="hate_speech18" --config="default" --split="train" --feature="text"
|
| 190 |
|
| 191 |
-
Example for Glue dataset:
|
| 192 |
-
python3 run_data_measurements.py --dataset="glue" --config="ax" --split="train" --feature="premise"
|
| 193 |
-
|
| 194 |
Example for IMDB dataset:
|
| 195 |
python3 run_data_measurements.py --dataset="imdb" --config="plain_text" --split="train" --label_field="label" --feature="text"
|
| 196 |
"""
|
|
|
|
| 25 |
# General stats widget
|
| 26 |
dstats.load_or_prepare_general_stats()
|
| 27 |
# Labels widget
|
| 28 |
+
try:
|
| 29 |
+
dstats.set_label_field("label")
|
| 30 |
+
dstats.load_or_prepare_labels()
|
| 31 |
+
except:
|
| 32 |
+
pass
|
| 33 |
# Text lengths widget
|
| 34 |
dstats.load_or_prepare_text_lengths()
|
| 35 |
if show_embeddings:
|
|
|
|
| 80 |
print("Figure saved to %s." % fig_tok_length_fid)
|
| 81 |
print("Done!")
|
| 82 |
|
| 83 |
+
if all or dataset_args["calculation"] == "labels":
|
| 84 |
if not dstats.label_field:
|
| 85 |
+
print("Warning: You asked for label calculation, but didn't provide "
|
| 86 |
+
"the labels field name. Assuming it is 'label'...")
|
| 87 |
dstats.set_label_field("label")
|
| 88 |
print("\n* Calculating label distribution.")
|
| 89 |
dstats.load_or_prepare_labels()
|
|
|
|
| 193 |
Example for hate speech18 dataset:
|
| 194 |
python3 run_data_measurements.py --dataset="hate_speech18" --config="default" --split="train" --feature="text"
|
| 195 |
|
|
|
|
|
|
|
|
|
|
| 196 |
Example for IMDB dataset:
|
| 197 |
python3 run_data_measurements.py --dataset="imdb" --config="plain_text" --split="train" --label_field="label" --feature="text"
|
| 198 |
"""
|