| { | |
| "batch_size": 18, | |
| "num_epochs": 4, | |
| "lr": 5e-5, | |
| "seq_len": 256, | |
| "d_model": 256, | |
| "N": 4, | |
| "h": 4, | |
| "train": "dataset/openweb_fine.jsonl", | |
| "test": "dataset/openweb_fine.jsonl", | |
| "d_ff": 1024, | |
| "dropout": 0.1, | |
| "model_folder": "./", | |
| "model_basename": "", | |
| "preload": "weights", | |
| "tokenizer_file": "openweb2.tokenizer.json", | |
| "experiment_name": "runs/openweb2", | |
| "dataset": "dataset/dataset_general.jsonl", | |
| "loss_file": "openweb2/losses.jsonl", | |
| "fine_dataset": "dataset/fine_tune.jsonl", | |
| "fine_epochs": 0 | |
| } |