JingyangOu
/

radd-lambda-dce

Model card Files Files and versions

jingyang Ou commited on Jul 17, 2024

Commit

f0c5b40

·

1 Parent(s): acf4386

update model

Files changed (3) hide show

README.md +5 -0
config.json +60 -0
model.safetensors +3 -0

README.md ADDED Viewed

	@@ -0,0 +1,5 @@

+Reparameterized Absorbing Discrete Diffusion (RADD) small model with lambda-dce loss trained for 400k iterations.
+Code: https://github.com/ML-GSAI/RADD.
+Paper: https://arxiv.org/abs/2406.03736.

config.json ADDED Viewed

	@@ -0,0 +1,60 @@

+{
+    "ngpus": 32,
+    "tokens": 50257,
+    "gpt_dir": "assets/gpt2-large",
+    "outdir": "../output",
+    "training": {
+        "batch_size": 512,
+        "accum": 1,
+        "n_iters": 1000001,
+        "snapshot_freq": 50000,
+        "log_freq": 50,
+        "eval_freq": 100,
+        "snapshot_freq_for_preemption": 10000,
+        "weight": "standard",
+        "snapshot_sampling": false,
+        "ema": 0.9999,
+        "loss_type": "t_DCE"
+    },
+    "data": {
+        "train": "openwebtext",
+        "valid": "wikitext103",
+        "cache_dir": "data"
+    },
+    "noise": {
+        "type": "loglinear",
+        "sigma_min": 0.0001,
+        "sigma_max": 20
+    },
+    "sampling": {
+        "predictor": "euler",
+        "steps": 1024
+    },
+    "eval": {
+        "batch_size": 512,
+        "perplexity": true,
+        "perplexity_batch_size": 16
+    },
+    "optim": {
+        "weight_decay": 0.03,
+        "optimizer": "AdamW",
+        "lr": 0.0003,
+        "beta1": 0.9,
+        "beta2": 0.999,
+        "eps": 1e-08,
+        "warmup": 2500,
+        "grad_clip": 1.0
+    },
+    "model": {
+        "name": "small_wotsm",
+        "type": "ddit_wot",
+        "hidden_size": 768,
+        "cond_dim": 128,
+        "length": 1024,
+        "n_blocks": 12,
+        "n_heads": 12,
+        "dropout": 0.02,
+        "use_checkpoint": false,
+        "dtype": "float16"
+    }
+}

model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:9ad49d643dabc3aca8877d0df7928d3f81691491705d36f29c3e510cce8f4b91
+size 649074528