Spaces:
Runtime error
Runtime error
| from omegaconf import OmegaConf | |
| from torch.cuda import is_available as use_cuda | |
| model_config = { | |
| "name": "google/t5-large-ssm-nq", | |
| "class_name": "AutoModelForSeq2SeqLM", | |
| "tokenizer_class": "AutoTokenizer", | |
| "tokenizer_name": "google/t5-large-ssm-nq", | |
| "inner_params": [ | |
| "encoder.block.22.layer.1.DenseReluDense.wi.weight", | |
| "encoder.block.22.layer.1.DenseReluDense.wo.weight", | |
| "encoder.block.23.layer.1.DenseReluDense.wi.weight", | |
| "encoder.block.23.layer.1.DenseReluDense.wo.weight", | |
| "decoder.block.22.layer.2.DenseReluDense.wi.weight", | |
| "decoder.block.22.layer.2.DenseReluDense.wo.weight", | |
| "decoder.block.23.layer.2.DenseReluDense.wi.weight", | |
| "decoder.block.23.layer.2.DenseReluDense.wo.weight", | |
| ], | |
| "pt": None, | |
| "small_name": "t5-small", | |
| } | |
| ft_config = OmegaConf.create({ | |
| "device": "cpu", | |
| "edit_lr": 5e-6, | |
| "train_base": False, | |
| "grad_clip": 100, | |
| "ft": { | |
| "verbose": False, | |
| "max_edit_steps": 100, | |
| "time_limit": None, | |
| "locality": { | |
| "enabled": False, | |
| "oracle": True, | |
| "cedit": 1e-2, | |
| "batch_size": 1, | |
| }, | |
| "rank": None, | |
| "opt": "RMSprop", | |
| "init_std": 0.01, | |
| }, | |
| "model": model_config, | |
| }) | |
| lu_config = OmegaConf.create({ | |
| "device": "cpu", | |
| "lu": { | |
| "threshold": 2.75, | |
| "onehot_logit": 1, | |
| }, | |
| "model": model_config, | |
| }) | |
| ke_config = OmegaConf.create({ | |
| "device": "cpu", | |
| "train_base": False, | |
| "lr": 1e-5, | |
| "model": model_config, | |
| }) | |
| enn_config = OmegaConf.create({ | |
| "device": "cpu", | |
| "lr": 1e-5, | |
| "edit_lr": 1e-2, | |
| "lr_lr": 1e-3, | |
| "train_base": True, | |
| "grad_clip": 100, | |
| "dropout": 0, | |
| "no_grad_layers": None, | |
| "enn": { | |
| "first_order": False, | |
| "n_edit_steps": 1, | |
| }, | |
| "model": model_config, | |
| "archive": 8684705655, # "/iris/u/clin/code/efk/outputs/2022-02-09_05-48-20_8684705655/models/t5-large-ssm-nq.2022-02-09_05-48-20_8684705655", | |
| }) | |
| mend_config = OmegaConf.create({ | |
| "device": "cpu", | |
| "lr": 1e-6, | |
| "edit_lr": 1e-4, | |
| "lr_lr": 1e-4, | |
| "train_base": True, | |
| "grad_clip": 100, | |
| "dropout": 0, | |
| "no_grad_layers": None, | |
| "gtn": { | |
| "one_sided": False, | |
| "n_hidden": 1, | |
| "hidden_dim": None, | |
| "init": "id", | |
| "norm": True, | |
| "combine": True, | |
| "x_only": False, | |
| "delta_only": False, | |
| "act": "relu", | |
| "rank": 1920, | |
| "mlp_class": "IDMLP", | |
| "shared": True, | |
| "descent": False, | |
| }, | |
| "model": model_config, | |
| "archive": 5940349945, # "/iris/u/clin/code/efk/outputs/2022-02-09_11-47-28_5940349945/models/t5-large-ssm-nq.2022-02-09_11-47-28_5940349945", | |
| }) | |
| serac_config = OmegaConf.create({ | |
| "device": "cpu", # "device": "cuda" if use_cuda() else "cpu", | |
| "lr": 1e-5, | |
| "edit_lr": 1e-2, | |
| "lr_lr": 0, | |
| "train_base": False, | |
| "grad_clip": 100, | |
| "dropout": 0, | |
| "no_grad_layers": None, | |
| "rep": { | |
| "cls_name": "distilbert-base-cased", | |
| "cls_class": "AutoModel", | |
| "supervised": "true", | |
| "cos": False, | |
| "freeze": None, | |
| "square": True, | |
| "bound_embeds": False, | |
| "use_all_negatives": False, | |
| "freeze_cntr": False, | |
| "dist_heads": 1, | |
| "cross_attend": False, | |
| "lora": None, | |
| "soft_weighting": False, | |
| "checkpoint_grad": False, | |
| "cache_embeds": True, | |
| }, | |
| "model": model_config, | |
| "archive": 4719776130, # "/iris/u/clin/code/efk/outputs/2022-02-09_14-05-56_4719776130/models/t5-large-ssm-nq.2022-02-09_14-05-56_4719776130", | |
| }) | |