| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 2.9454545454545453, | |
| "eval_steps": 500, | |
| "global_step": 81, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 2.0000000000000003e-06, | |
| "loss": 1.1464, | |
| "step": 1 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "learning_rate": 4.000000000000001e-06, | |
| "loss": 1.1231, | |
| "step": 2 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "learning_rate": 6e-06, | |
| "loss": 1.0741, | |
| "step": 3 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "learning_rate": 8.000000000000001e-06, | |
| "loss": 1.0219, | |
| "step": 4 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "learning_rate": 1e-05, | |
| "loss": 1.0006, | |
| "step": 5 | |
| }, | |
| { | |
| "epoch": 0.22, | |
| "learning_rate": 1.2e-05, | |
| "loss": 0.8375, | |
| "step": 6 | |
| }, | |
| { | |
| "epoch": 0.25, | |
| "learning_rate": 1.4e-05, | |
| "loss": 0.7492, | |
| "step": 7 | |
| }, | |
| { | |
| "epoch": 0.29, | |
| "learning_rate": 1.6000000000000003e-05, | |
| "loss": 0.6305, | |
| "step": 8 | |
| }, | |
| { | |
| "epoch": 0.33, | |
| "learning_rate": 1.8e-05, | |
| "loss": 0.5905, | |
| "step": 9 | |
| }, | |
| { | |
| "epoch": 0.36, | |
| "learning_rate": 2e-05, | |
| "loss": 0.5033, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.4, | |
| "learning_rate": 1.9990212265199738e-05, | |
| "loss": 0.4194, | |
| "step": 11 | |
| }, | |
| { | |
| "epoch": 0.44, | |
| "learning_rate": 1.996086822074945e-05, | |
| "loss": 0.3283, | |
| "step": 12 | |
| }, | |
| { | |
| "epoch": 0.47, | |
| "learning_rate": 1.9912025308994146e-05, | |
| "loss": 0.2703, | |
| "step": 13 | |
| }, | |
| { | |
| "epoch": 0.51, | |
| "learning_rate": 1.9843779142227258e-05, | |
| "loss": 0.2523, | |
| "step": 14 | |
| }, | |
| { | |
| "epoch": 0.55, | |
| "learning_rate": 1.975626331552507e-05, | |
| "loss": 0.2421, | |
| "step": 15 | |
| }, | |
| { | |
| "epoch": 0.58, | |
| "learning_rate": 1.96496491452281e-05, | |
| "loss": 0.2092, | |
| "step": 16 | |
| }, | |
| { | |
| "epoch": 0.62, | |
| "learning_rate": 1.9524145333581315e-05, | |
| "loss": 0.1988, | |
| "step": 17 | |
| }, | |
| { | |
| "epoch": 0.65, | |
| "learning_rate": 1.9379997560189677e-05, | |
| "loss": 0.1909, | |
| "step": 18 | |
| }, | |
| { | |
| "epoch": 0.69, | |
| "learning_rate": 1.9217488001088784e-05, | |
| "loss": 0.1854, | |
| "step": 19 | |
| }, | |
| { | |
| "epoch": 0.73, | |
| "learning_rate": 1.903693477637204e-05, | |
| "loss": 0.1908, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.76, | |
| "learning_rate": 1.883869132745561e-05, | |
| "loss": 0.1873, | |
| "step": 21 | |
| }, | |
| { | |
| "epoch": 0.8, | |
| "learning_rate": 1.862314572520028e-05, | |
| "loss": 0.1706, | |
| "step": 22 | |
| }, | |
| { | |
| "epoch": 0.84, | |
| "learning_rate": 1.8390719910244487e-05, | |
| "loss": 0.175, | |
| "step": 23 | |
| }, | |
| { | |
| "epoch": 0.87, | |
| "learning_rate": 1.8141868867035745e-05, | |
| "loss": 0.1628, | |
| "step": 24 | |
| }, | |
| { | |
| "epoch": 0.91, | |
| "learning_rate": 1.7877079733177185e-05, | |
| "loss": 0.1604, | |
| "step": 25 | |
| }, | |
| { | |
| "epoch": 0.95, | |
| "learning_rate": 1.759687084583285e-05, | |
| "loss": 0.1537, | |
| "step": 26 | |
| }, | |
| { | |
| "epoch": 0.98, | |
| "learning_rate": 1.7301790727058344e-05, | |
| "loss": 0.1493, | |
| "step": 27 | |
| }, | |
| { | |
| "epoch": 1.02, | |
| "learning_rate": 1.6992417010043144e-05, | |
| "loss": 0.1396, | |
| "step": 28 | |
| }, | |
| { | |
| "epoch": 1.05, | |
| "learning_rate": 1.666935530836651e-05, | |
| "loss": 0.1569, | |
| "step": 29 | |
| }, | |
| { | |
| "epoch": 1.09, | |
| "learning_rate": 1.6333238030480473e-05, | |
| "loss": 0.151, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 1.13, | |
| "learning_rate": 1.5984723141740578e-05, | |
| "loss": 0.1426, | |
| "step": 31 | |
| }, | |
| { | |
| "epoch": 1.16, | |
| "learning_rate": 1.562449287640781e-05, | |
| "loss": 0.1285, | |
| "step": 32 | |
| }, | |
| { | |
| "epoch": 1.2, | |
| "learning_rate": 1.5253252402142989e-05, | |
| "loss": 0.1379, | |
| "step": 33 | |
| }, | |
| { | |
| "epoch": 1.24, | |
| "learning_rate": 1.4871728439607967e-05, | |
| "loss": 0.1275, | |
| "step": 34 | |
| }, | |
| { | |
| "epoch": 1.27, | |
| "learning_rate": 1.4480667839875786e-05, | |
| "loss": 0.1377, | |
| "step": 35 | |
| }, | |
| { | |
| "epoch": 1.31, | |
| "learning_rate": 1.408083612243465e-05, | |
| "loss": 0.1316, | |
| "step": 36 | |
| }, | |
| { | |
| "epoch": 1.35, | |
| "learning_rate": 1.367301597664757e-05, | |
| "loss": 0.1338, | |
| "step": 37 | |
| }, | |
| { | |
| "epoch": 1.38, | |
| "learning_rate": 1.3258005729601178e-05, | |
| "loss": 0.1201, | |
| "step": 38 | |
| }, | |
| { | |
| "epoch": 1.42, | |
| "learning_rate": 1.2836617783342968e-05, | |
| "loss": 0.1174, | |
| "step": 39 | |
| }, | |
| { | |
| "epoch": 1.45, | |
| "learning_rate": 1.2409677024566145e-05, | |
| "loss": 0.1283, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 1.49, | |
| "learning_rate": 1.1978019209855174e-05, | |
| "loss": 0.1332, | |
| "step": 41 | |
| }, | |
| { | |
| "epoch": 1.53, | |
| "learning_rate": 1.1542489329653024e-05, | |
| "loss": 0.1157, | |
| "step": 42 | |
| }, | |
| { | |
| "epoch": 1.56, | |
| "learning_rate": 1.11039399541527e-05, | |
| "loss": 0.1339, | |
| "step": 43 | |
| }, | |
| { | |
| "epoch": 1.6, | |
| "learning_rate": 1.066322956435104e-05, | |
| "loss": 0.1149, | |
| "step": 44 | |
| }, | |
| { | |
| "epoch": 1.64, | |
| "learning_rate": 1.022122087153187e-05, | |
| "loss": 0.1129, | |
| "step": 45 | |
| }, | |
| { | |
| "epoch": 1.67, | |
| "learning_rate": 9.778779128468133e-06, | |
| "loss": 0.1161, | |
| "step": 46 | |
| }, | |
| { | |
| "epoch": 1.71, | |
| "learning_rate": 9.336770435648963e-06, | |
| "loss": 0.1238, | |
| "step": 47 | |
| }, | |
| { | |
| "epoch": 1.75, | |
| "learning_rate": 8.896060045847305e-06, | |
| "loss": 0.1132, | |
| "step": 48 | |
| }, | |
| { | |
| "epoch": 1.78, | |
| "learning_rate": 8.457510670346976e-06, | |
| "loss": 0.1172, | |
| "step": 49 | |
| }, | |
| { | |
| "epoch": 1.82, | |
| "learning_rate": 8.021980790144828e-06, | |
| "loss": 0.1228, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 1.85, | |
| "learning_rate": 7.590322975433857e-06, | |
| "loss": 0.1177, | |
| "step": 51 | |
| }, | |
| { | |
| "epoch": 1.89, | |
| "learning_rate": 7.163382216657033e-06, | |
| "loss": 0.1318, | |
| "step": 52 | |
| }, | |
| { | |
| "epoch": 1.93, | |
| "learning_rate": 6.741994270398826e-06, | |
| "loss": 0.111, | |
| "step": 53 | |
| }, | |
| { | |
| "epoch": 1.96, | |
| "learning_rate": 6.326984023352435e-06, | |
| "loss": 0.113, | |
| "step": 54 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "learning_rate": 5.919163877565351e-06, | |
| "loss": 0.1157, | |
| "step": 55 | |
| }, | |
| { | |
| "epoch": 2.04, | |
| "learning_rate": 5.519332160124215e-06, | |
| "loss": 0.1044, | |
| "step": 56 | |
| }, | |
| { | |
| "epoch": 2.07, | |
| "learning_rate": 5.128271560392037e-06, | |
| "loss": 0.1079, | |
| "step": 57 | |
| }, | |
| { | |
| "epoch": 2.11, | |
| "learning_rate": 4.746747597857014e-06, | |
| "loss": 0.1172, | |
| "step": 58 | |
| }, | |
| { | |
| "epoch": 2.15, | |
| "learning_rate": 4.375507123592194e-06, | |
| "loss": 0.1111, | |
| "step": 59 | |
| }, | |
| { | |
| "epoch": 2.18, | |
| "learning_rate": 4.015276858259427e-06, | |
| "loss": 0.1158, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 2.22, | |
| "learning_rate": 3.6667619695195287e-06, | |
| "loss": 0.114, | |
| "step": 61 | |
| }, | |
| { | |
| "epoch": 2.25, | |
| "learning_rate": 3.330644691633492e-06, | |
| "loss": 0.1063, | |
| "step": 62 | |
| }, | |
| { | |
| "epoch": 2.29, | |
| "learning_rate": 3.00758298995686e-06, | |
| "loss": 0.1101, | |
| "step": 63 | |
| }, | |
| { | |
| "epoch": 2.33, | |
| "learning_rate": 2.698209272941659e-06, | |
| "loss": 0.1105, | |
| "step": 64 | |
| }, | |
| { | |
| "epoch": 2.36, | |
| "learning_rate": 2.403129154167153e-06, | |
| "loss": 0.1094, | |
| "step": 65 | |
| }, | |
| { | |
| "epoch": 2.4, | |
| "learning_rate": 2.1229202668228197e-06, | |
| "loss": 0.1153, | |
| "step": 66 | |
| }, | |
| { | |
| "epoch": 2.44, | |
| "learning_rate": 1.8581311329642592e-06, | |
| "loss": 0.1258, | |
| "step": 67 | |
| }, | |
| { | |
| "epoch": 2.47, | |
| "learning_rate": 1.609280089755515e-06, | |
| "loss": 0.1177, | |
| "step": 68 | |
| }, | |
| { | |
| "epoch": 2.51, | |
| "learning_rate": 1.3768542747997215e-06, | |
| "loss": 0.1179, | |
| "step": 69 | |
| }, | |
| { | |
| "epoch": 2.55, | |
| "learning_rate": 1.161308672544389e-06, | |
| "loss": 0.1188, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 2.58, | |
| "learning_rate": 9.630652236279626e-07, | |
| "loss": 0.108, | |
| "step": 71 | |
| }, | |
| { | |
| "epoch": 2.62, | |
| "learning_rate": 7.825119989112173e-07, | |
| "loss": 0.1122, | |
| "step": 72 | |
| }, | |
| { | |
| "epoch": 2.65, | |
| "learning_rate": 6.200024398103255e-07, | |
| "loss": 0.1125, | |
| "step": 73 | |
| }, | |
| { | |
| "epoch": 2.69, | |
| "learning_rate": 4.7585466641868696e-07, | |
| "loss": 0.1204, | |
| "step": 74 | |
| }, | |
| { | |
| "epoch": 2.73, | |
| "learning_rate": 3.5035085477190143e-07, | |
| "loss": 0.1185, | |
| "step": 75 | |
| }, | |
| { | |
| "epoch": 2.76, | |
| "learning_rate": 2.4373668447493225e-07, | |
| "loss": 0.1059, | |
| "step": 76 | |
| }, | |
| { | |
| "epoch": 2.8, | |
| "learning_rate": 1.562208577727442e-07, | |
| "loss": 0.1061, | |
| "step": 77 | |
| }, | |
| { | |
| "epoch": 2.84, | |
| "learning_rate": 8.797469100585432e-08, | |
| "loss": 0.1147, | |
| "step": 78 | |
| }, | |
| { | |
| "epoch": 2.87, | |
| "learning_rate": 3.913177925055189e-08, | |
| "loss": 0.1114, | |
| "step": 79 | |
| }, | |
| { | |
| "epoch": 2.91, | |
| "learning_rate": 9.78773480026396e-09, | |
| "loss": 0.1119, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 2.95, | |
| "learning_rate": 0.0, | |
| "loss": 0.1178, | |
| "step": 81 | |
| }, | |
| { | |
| "epoch": 2.95, | |
| "step": 81, | |
| "total_flos": 5.979626147309158e+16, | |
| "train_loss": 0.23224136665647413, | |
| "train_runtime": 1011.6456, | |
| "train_samples_per_second": 20.758, | |
| "train_steps_per_second": 0.08 | |
| } | |
| ], | |
| "logging_steps": 1.0, | |
| "max_steps": 81, | |
| "num_train_epochs": 3, | |
| "save_steps": 100, | |
| "total_flos": 5.979626147309158e+16, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |