| { | |
| "best_global_step": null, | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 3.0, | |
| "eval_steps": 500, | |
| "global_step": 6666, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.0144014401440144, | |
| "grad_norm": 10.98869514465332, | |
| "learning_rate": 4.347826086956522e-07, | |
| "loss": 0.6974, | |
| "step": 32 | |
| }, | |
| { | |
| "epoch": 0.0288028802880288, | |
| "grad_norm": 10.853715896606445, | |
| "learning_rate": 9.145427286356823e-07, | |
| "loss": 0.6333, | |
| "step": 64 | |
| }, | |
| { | |
| "epoch": 0.043204320432043204, | |
| "grad_norm": 13.330265045166016, | |
| "learning_rate": 1.3943028485757123e-06, | |
| "loss": 0.5058, | |
| "step": 96 | |
| }, | |
| { | |
| "epoch": 0.0576057605760576, | |
| "grad_norm": 11.676138877868652, | |
| "learning_rate": 1.8740629685157422e-06, | |
| "loss": 0.5039, | |
| "step": 128 | |
| }, | |
| { | |
| "epoch": 0.07200720072007201, | |
| "grad_norm": 11.024947166442871, | |
| "learning_rate": 2.3388305847076464e-06, | |
| "loss": 0.5272, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.08640864086408641, | |
| "grad_norm": 11.500349998474121, | |
| "learning_rate": 2.8185907046476763e-06, | |
| "loss": 0.5414, | |
| "step": 192 | |
| }, | |
| { | |
| "epoch": 0.10081008100810081, | |
| "grad_norm": 14.144675254821777, | |
| "learning_rate": 3.2983508245877066e-06, | |
| "loss": 0.5167, | |
| "step": 224 | |
| }, | |
| { | |
| "epoch": 0.1152115211521152, | |
| "grad_norm": 15.339383125305176, | |
| "learning_rate": 3.763118440779611e-06, | |
| "loss": 0.4782, | |
| "step": 256 | |
| }, | |
| { | |
| "epoch": 0.12961296129612962, | |
| "grad_norm": 14.547229766845703, | |
| "learning_rate": 4.242878560719641e-06, | |
| "loss": 0.5071, | |
| "step": 288 | |
| }, | |
| { | |
| "epoch": 0.14401440144014402, | |
| "grad_norm": 16.514537811279297, | |
| "learning_rate": 4.722638680659671e-06, | |
| "loss": 0.5238, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 0.15841584158415842, | |
| "grad_norm": 13.728464126586914, | |
| "learning_rate": 5.2023988005997004e-06, | |
| "loss": 0.6176, | |
| "step": 352 | |
| }, | |
| { | |
| "epoch": 0.17281728172817282, | |
| "grad_norm": 9.859703063964844, | |
| "learning_rate": 5.682158920539731e-06, | |
| "loss": 0.5022, | |
| "step": 384 | |
| }, | |
| { | |
| "epoch": 0.18721872187218722, | |
| "grad_norm": 16.529651641845703, | |
| "learning_rate": 6.16191904047976e-06, | |
| "loss": 0.5606, | |
| "step": 416 | |
| }, | |
| { | |
| "epoch": 0.20162016201620162, | |
| "grad_norm": 12.64522647857666, | |
| "learning_rate": 6.6416791604197905e-06, | |
| "loss": 0.5115, | |
| "step": 448 | |
| }, | |
| { | |
| "epoch": 0.21602160216021601, | |
| "grad_norm": 9.362441062927246, | |
| "learning_rate": 7.121439280359821e-06, | |
| "loss": 0.5371, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 0.2304230423042304, | |
| "grad_norm": 12.86221694946289, | |
| "learning_rate": 7.60119940029985e-06, | |
| "loss": 0.5343, | |
| "step": 512 | |
| }, | |
| { | |
| "epoch": 0.2448244824482448, | |
| "grad_norm": 12.512419700622559, | |
| "learning_rate": 8.065967016491755e-06, | |
| "loss": 0.6191, | |
| "step": 544 | |
| }, | |
| { | |
| "epoch": 0.25922592259225924, | |
| "grad_norm": 14.35505199432373, | |
| "learning_rate": 8.53073463268366e-06, | |
| "loss": 0.5668, | |
| "step": 576 | |
| }, | |
| { | |
| "epoch": 0.27362736273627364, | |
| "grad_norm": 14.951635360717773, | |
| "learning_rate": 9.010494752623688e-06, | |
| "loss": 0.6024, | |
| "step": 608 | |
| }, | |
| { | |
| "epoch": 0.28802880288028804, | |
| "grad_norm": 11.853578567504883, | |
| "learning_rate": 9.490254872563718e-06, | |
| "loss": 0.6092, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 0.30243024302430244, | |
| "grad_norm": 9.165077209472656, | |
| "learning_rate": 9.970014992503749e-06, | |
| "loss": 0.5587, | |
| "step": 672 | |
| }, | |
| { | |
| "epoch": 0.31683168316831684, | |
| "grad_norm": 12.137311935424805, | |
| "learning_rate": 9.999382956748588e-06, | |
| "loss": 0.5444, | |
| "step": 704 | |
| }, | |
| { | |
| "epoch": 0.33123312331233123, | |
| "grad_norm": 15.218328475952148, | |
| "learning_rate": 9.997364717027728e-06, | |
| "loss": 0.5909, | |
| "step": 736 | |
| }, | |
| { | |
| "epoch": 0.34563456345634563, | |
| "grad_norm": 13.624066352844238, | |
| "learning_rate": 9.993943105133823e-06, | |
| "loss": 0.6135, | |
| "step": 768 | |
| }, | |
| { | |
| "epoch": 0.36003600360036003, | |
| "grad_norm": 12.48714542388916, | |
| "learning_rate": 9.989119081932283e-06, | |
| "loss": 0.5868, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 0.37443744374437443, | |
| "grad_norm": 20.65450668334961, | |
| "learning_rate": 9.9828940021171e-06, | |
| "loss": 0.6172, | |
| "step": 832 | |
| }, | |
| { | |
| "epoch": 0.38883888388838883, | |
| "grad_norm": 15.066054344177246, | |
| "learning_rate": 9.975269613830395e-06, | |
| "loss": 0.5682, | |
| "step": 864 | |
| }, | |
| { | |
| "epoch": 0.40324032403240323, | |
| "grad_norm": 10.448845863342285, | |
| "learning_rate": 9.966248058171527e-06, | |
| "loss": 0.6659, | |
| "step": 896 | |
| }, | |
| { | |
| "epoch": 0.41764176417641763, | |
| "grad_norm": 6.550926208496094, | |
| "learning_rate": 9.955831868595796e-06, | |
| "loss": 0.6053, | |
| "step": 928 | |
| }, | |
| { | |
| "epoch": 0.43204320432043203, | |
| "grad_norm": 18.164133071899414, | |
| "learning_rate": 9.94402397020302e-06, | |
| "loss": 0.5109, | |
| "step": 960 | |
| }, | |
| { | |
| "epoch": 0.4464446444644464, | |
| "grad_norm": 26.886669158935547, | |
| "learning_rate": 9.930827678916084e-06, | |
| "loss": 0.6006, | |
| "step": 992 | |
| }, | |
| { | |
| "epoch": 0.4608460846084608, | |
| "grad_norm": 9.457767486572266, | |
| "learning_rate": 9.916246700549754e-06, | |
| "loss": 0.5794, | |
| "step": 1024 | |
| }, | |
| { | |
| "epoch": 0.4752475247524752, | |
| "grad_norm": 15.5580472946167, | |
| "learning_rate": 9.900285129770016e-06, | |
| "loss": 0.5606, | |
| "step": 1056 | |
| }, | |
| { | |
| "epoch": 0.4896489648964896, | |
| "grad_norm": 9.84897232055664, | |
| "learning_rate": 9.882947448944177e-06, | |
| "loss": 0.6579, | |
| "step": 1088 | |
| }, | |
| { | |
| "epoch": 0.504050405040504, | |
| "grad_norm": 10.959239959716797, | |
| "learning_rate": 9.864238526882147e-06, | |
| "loss": 0.5455, | |
| "step": 1120 | |
| }, | |
| { | |
| "epoch": 0.5184518451845185, | |
| "grad_norm": 15.946135520935059, | |
| "learning_rate": 9.844163617469138e-06, | |
| "loss": 0.6002, | |
| "step": 1152 | |
| }, | |
| { | |
| "epoch": 0.5328532853285328, | |
| "grad_norm": 12.9840726852417, | |
| "learning_rate": 9.822728358190274e-06, | |
| "loss": 0.6541, | |
| "step": 1184 | |
| }, | |
| { | |
| "epoch": 0.5472547254725473, | |
| "grad_norm": 11.688780784606934, | |
| "learning_rate": 9.799938768547452e-06, | |
| "loss": 0.6294, | |
| "step": 1216 | |
| }, | |
| { | |
| "epoch": 0.5616561656165616, | |
| "grad_norm": 13.91913890838623, | |
| "learning_rate": 9.77580124836893e-06, | |
| "loss": 0.5856, | |
| "step": 1248 | |
| }, | |
| { | |
| "epoch": 0.5760576057605761, | |
| "grad_norm": 13.728696823120117, | |
| "learning_rate": 9.750322576012119e-06, | |
| "loss": 0.622, | |
| "step": 1280 | |
| }, | |
| { | |
| "epoch": 0.5904590459045904, | |
| "grad_norm": 8.887660026550293, | |
| "learning_rate": 9.723509906460054e-06, | |
| "loss": 0.6663, | |
| "step": 1312 | |
| }, | |
| { | |
| "epoch": 0.6048604860486049, | |
| "grad_norm": 12.27409839630127, | |
| "learning_rate": 9.69537076931213e-06, | |
| "loss": 0.5707, | |
| "step": 1344 | |
| }, | |
| { | |
| "epoch": 0.6192619261926192, | |
| "grad_norm": 23.663471221923828, | |
| "learning_rate": 9.665913066669608e-06, | |
| "loss": 0.6018, | |
| "step": 1376 | |
| }, | |
| { | |
| "epoch": 0.6336633663366337, | |
| "grad_norm": 14.472834587097168, | |
| "learning_rate": 9.635145070916541e-06, | |
| "loss": 0.5736, | |
| "step": 1408 | |
| }, | |
| { | |
| "epoch": 0.648064806480648, | |
| "grad_norm": 13.856266975402832, | |
| "learning_rate": 9.603075422396685e-06, | |
| "loss": 0.6117, | |
| "step": 1440 | |
| }, | |
| { | |
| "epoch": 0.6624662466246625, | |
| "grad_norm": 11.307412147521973, | |
| "learning_rate": 9.569713126987122e-06, | |
| "loss": 0.6123, | |
| "step": 1472 | |
| }, | |
| { | |
| "epoch": 0.6768676867686768, | |
| "grad_norm": 12.995174407958984, | |
| "learning_rate": 9.535067553569175e-06, | |
| "loss": 0.5784, | |
| "step": 1504 | |
| }, | |
| { | |
| "epoch": 0.6912691269126913, | |
| "grad_norm": 21.58072853088379, | |
| "learning_rate": 9.499148431397448e-06, | |
| "loss": 0.6081, | |
| "step": 1536 | |
| }, | |
| { | |
| "epoch": 0.7056705670567057, | |
| "grad_norm": 16.922595977783203, | |
| "learning_rate": 9.461965847367611e-06, | |
| "loss": 0.6303, | |
| "step": 1568 | |
| }, | |
| { | |
| "epoch": 0.7200720072007201, | |
| "grad_norm": 17.760387420654297, | |
| "learning_rate": 9.423530243183783e-06, | |
| "loss": 0.5761, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 0.7344734473447345, | |
| "grad_norm": 11.714215278625488, | |
| "learning_rate": 9.385111038077417e-06, | |
| "loss": 0.5852, | |
| "step": 1632 | |
| }, | |
| { | |
| "epoch": 0.7488748874887489, | |
| "grad_norm": 12.36257266998291, | |
| "learning_rate": 9.344240422449167e-06, | |
| "loss": 0.5965, | |
| "step": 1664 | |
| }, | |
| { | |
| "epoch": 0.7632763276327633, | |
| "grad_norm": 14.300825119018555, | |
| "learning_rate": 9.302149846608464e-06, | |
| "loss": 0.5316, | |
| "step": 1696 | |
| }, | |
| { | |
| "epoch": 0.7776777677767777, | |
| "grad_norm": 16.075668334960938, | |
| "learning_rate": 9.25885113053368e-06, | |
| "loss": 0.6228, | |
| "step": 1728 | |
| }, | |
| { | |
| "epoch": 0.7920792079207921, | |
| "grad_norm": 11.793745040893555, | |
| "learning_rate": 9.214356433476091e-06, | |
| "loss": 0.5644, | |
| "step": 1760 | |
| }, | |
| { | |
| "epoch": 0.8064806480648065, | |
| "grad_norm": 12.965546607971191, | |
| "learning_rate": 9.168678250545255e-06, | |
| "loss": 0.5646, | |
| "step": 1792 | |
| }, | |
| { | |
| "epoch": 0.8208820882088209, | |
| "grad_norm": 9.327117919921875, | |
| "learning_rate": 9.121829409200145e-06, | |
| "loss": 0.5961, | |
| "step": 1824 | |
| }, | |
| { | |
| "epoch": 0.8352835283528353, | |
| "grad_norm": 9.750507354736328, | |
| "learning_rate": 9.073823065646882e-06, | |
| "loss": 0.6158, | |
| "step": 1856 | |
| }, | |
| { | |
| "epoch": 0.8496849684968497, | |
| "grad_norm": 17.131214141845703, | |
| "learning_rate": 9.024672701144184e-06, | |
| "loss": 0.5422, | |
| "step": 1888 | |
| }, | |
| { | |
| "epoch": 0.8640864086408641, | |
| "grad_norm": 9.129823684692383, | |
| "learning_rate": 8.97439211821753e-06, | |
| "loss": 0.5846, | |
| "step": 1920 | |
| }, | |
| { | |
| "epoch": 0.8784878487848785, | |
| "grad_norm": 9.996623039245605, | |
| "learning_rate": 8.922995436783104e-06, | |
| "loss": 0.5824, | |
| "step": 1952 | |
| }, | |
| { | |
| "epoch": 0.8928892889288929, | |
| "grad_norm": 12.214370727539062, | |
| "learning_rate": 8.870497090182593e-06, | |
| "loss": 0.5921, | |
| "step": 1984 | |
| }, | |
| { | |
| "epoch": 0.9072907290729073, | |
| "grad_norm": 12.267783164978027, | |
| "learning_rate": 8.816911821129992e-06, | |
| "loss": 0.5496, | |
| "step": 2016 | |
| }, | |
| { | |
| "epoch": 0.9216921692169217, | |
| "grad_norm": 11.49367618560791, | |
| "learning_rate": 8.762254677571517e-06, | |
| "loss": 0.6221, | |
| "step": 2048 | |
| }, | |
| { | |
| "epoch": 0.9360936093609361, | |
| "grad_norm": 11.387689590454102, | |
| "learning_rate": 8.706541008459798e-06, | |
| "loss": 0.5685, | |
| "step": 2080 | |
| }, | |
| { | |
| "epoch": 0.9504950495049505, | |
| "grad_norm": 12.342247009277344, | |
| "learning_rate": 8.649786459443555e-06, | |
| "loss": 0.5922, | |
| "step": 2112 | |
| }, | |
| { | |
| "epoch": 0.9648964896489649, | |
| "grad_norm": 17.48115348815918, | |
| "learning_rate": 8.59200696847395e-06, | |
| "loss": 0.6309, | |
| "step": 2144 | |
| }, | |
| { | |
| "epoch": 0.9792979297929792, | |
| "grad_norm": 13.321036338806152, | |
| "learning_rate": 8.533218761328843e-06, | |
| "loss": 0.5513, | |
| "step": 2176 | |
| }, | |
| { | |
| "epoch": 0.9936993699369937, | |
| "grad_norm": 13.628349304199219, | |
| "learning_rate": 8.473438347056239e-06, | |
| "loss": 0.5934, | |
| "step": 2208 | |
| }, | |
| { | |
| "epoch": 1.008100810081008, | |
| "grad_norm": 6.996334075927734, | |
| "learning_rate": 8.412682513338176e-06, | |
| "loss": 0.4909, | |
| "step": 2240 | |
| }, | |
| { | |
| "epoch": 1.0225022502250225, | |
| "grad_norm": 10.135323524475098, | |
| "learning_rate": 8.35096832177636e-06, | |
| "loss": 0.3238, | |
| "step": 2272 | |
| }, | |
| { | |
| "epoch": 1.036903690369037, | |
| "grad_norm": 7.344214916229248, | |
| "learning_rate": 8.28831310310089e-06, | |
| "loss": 0.297, | |
| "step": 2304 | |
| }, | |
| { | |
| "epoch": 1.0513051305130514, | |
| "grad_norm": 8.131421089172363, | |
| "learning_rate": 8.224734452303397e-06, | |
| "loss": 0.3335, | |
| "step": 2336 | |
| }, | |
| { | |
| "epoch": 1.0657065706570656, | |
| "grad_norm": 8.626580238342285, | |
| "learning_rate": 8.160250223695987e-06, | |
| "loss": 0.3357, | |
| "step": 2368 | |
| }, | |
| { | |
| "epoch": 1.08010801080108, | |
| "grad_norm": 20.845579147338867, | |
| "learning_rate": 8.094878525897325e-06, | |
| "loss": 0.3235, | |
| "step": 2400 | |
| }, | |
| { | |
| "epoch": 1.0945094509450946, | |
| "grad_norm": 13.4508695602417, | |
| "learning_rate": 8.028637716747355e-06, | |
| "loss": 0.3331, | |
| "step": 2432 | |
| }, | |
| { | |
| "epoch": 1.108910891089109, | |
| "grad_norm": 11.288851737976074, | |
| "learning_rate": 7.961546398151988e-06, | |
| "loss": 0.3425, | |
| "step": 2464 | |
| }, | |
| { | |
| "epoch": 1.1233123312331232, | |
| "grad_norm": 12.70616626739502, | |
| "learning_rate": 7.893623410859282e-06, | |
| "loss": 0.343, | |
| "step": 2496 | |
| }, | |
| { | |
| "epoch": 1.1377137713771377, | |
| "grad_norm": 14.446159362792969, | |
| "learning_rate": 7.824887829168522e-06, | |
| "loss": 0.3426, | |
| "step": 2528 | |
| }, | |
| { | |
| "epoch": 1.1521152115211521, | |
| "grad_norm": 9.85571575164795, | |
| "learning_rate": 7.755358955573747e-06, | |
| "loss": 0.3229, | |
| "step": 2560 | |
| }, | |
| { | |
| "epoch": 1.1665166516651666, | |
| "grad_norm": 16.067195892333984, | |
| "learning_rate": 7.685056315343165e-06, | |
| "loss": 0.3246, | |
| "step": 2592 | |
| }, | |
| { | |
| "epoch": 1.1809180918091808, | |
| "grad_norm": 20.33487319946289, | |
| "learning_rate": 7.613999651036016e-06, | |
| "loss": 0.374, | |
| "step": 2624 | |
| }, | |
| { | |
| "epoch": 1.1953195319531953, | |
| "grad_norm": 12.800278663635254, | |
| "learning_rate": 7.542208916958433e-06, | |
| "loss": 0.3008, | |
| "step": 2656 | |
| }, | |
| { | |
| "epoch": 1.2097209720972097, | |
| "grad_norm": 10.589587211608887, | |
| "learning_rate": 7.469704273559807e-06, | |
| "loss": 0.3738, | |
| "step": 2688 | |
| }, | |
| { | |
| "epoch": 1.2241224122412242, | |
| "grad_norm": 9.341863632202148, | |
| "learning_rate": 7.396506081771295e-06, | |
| "loss": 0.3035, | |
| "step": 2720 | |
| }, | |
| { | |
| "epoch": 1.2385238523852384, | |
| "grad_norm": 8.244071960449219, | |
| "learning_rate": 7.322634897288008e-06, | |
| "loss": 0.3748, | |
| "step": 2752 | |
| }, | |
| { | |
| "epoch": 1.2529252925292529, | |
| "grad_norm": 6.00961971282959, | |
| "learning_rate": 7.248111464796508e-06, | |
| "loss": 0.2727, | |
| "step": 2784 | |
| }, | |
| { | |
| "epoch": 1.2673267326732673, | |
| "grad_norm": 9.179043769836426, | |
| "learning_rate": 7.172956712149234e-06, | |
| "loss": 0.3004, | |
| "step": 2816 | |
| }, | |
| { | |
| "epoch": 1.2817281728172818, | |
| "grad_norm": 9.150934219360352, | |
| "learning_rate": 7.0971917444875015e-06, | |
| "loss": 0.3635, | |
| "step": 2848 | |
| }, | |
| { | |
| "epoch": 1.296129612961296, | |
| "grad_norm": 9.036107063293457, | |
| "learning_rate": 7.020837838314691e-06, | |
| "loss": 0.2943, | |
| "step": 2880 | |
| }, | |
| { | |
| "epoch": 1.3105310531053105, | |
| "grad_norm": 9.950891494750977, | |
| "learning_rate": 6.94391643552134e-06, | |
| "loss": 0.3727, | |
| "step": 2912 | |
| }, | |
| { | |
| "epoch": 1.324932493249325, | |
| "grad_norm": 15.246482849121094, | |
| "learning_rate": 6.866449137363768e-06, | |
| "loss": 0.2992, | |
| "step": 2944 | |
| }, | |
| { | |
| "epoch": 1.3393339333933394, | |
| "grad_norm": 10.054420471191406, | |
| "learning_rate": 6.788457698397973e-06, | |
| "loss": 0.3322, | |
| "step": 2976 | |
| }, | |
| { | |
| "epoch": 1.3537353735373538, | |
| "grad_norm": 10.79068374633789, | |
| "learning_rate": 6.709964020370445e-06, | |
| "loss": 0.3245, | |
| "step": 3008 | |
| }, | |
| { | |
| "epoch": 1.368136813681368, | |
| "grad_norm": 10.584324836730957, | |
| "learning_rate": 6.630990146067687e-06, | |
| "loss": 0.3418, | |
| "step": 3040 | |
| }, | |
| { | |
| "epoch": 1.3825382538253825, | |
| "grad_norm": 16.110841751098633, | |
| "learning_rate": 6.554047211421132e-06, | |
| "loss": 0.3437, | |
| "step": 3072 | |
| }, | |
| { | |
| "epoch": 1.396939693969397, | |
| "grad_norm": 11.55324649810791, | |
| "learning_rate": 6.4741928832357855e-06, | |
| "loss": 0.3011, | |
| "step": 3104 | |
| }, | |
| { | |
| "epoch": 1.4113411341134112, | |
| "grad_norm": 15.144320487976074, | |
| "learning_rate": 6.393924568602145e-06, | |
| "loss": 0.337, | |
| "step": 3136 | |
| }, | |
| { | |
| "epoch": 1.4257425742574257, | |
| "grad_norm": 10.759925842285156, | |
| "learning_rate": 6.313264808664494e-06, | |
| "loss": 0.3149, | |
| "step": 3168 | |
| }, | |
| { | |
| "epoch": 1.4401440144014401, | |
| "grad_norm": 14.752134323120117, | |
| "learning_rate": 6.232236254493746e-06, | |
| "loss": 0.3622, | |
| "step": 3200 | |
| }, | |
| { | |
| "epoch": 1.4545454545454546, | |
| "grad_norm": 11.484366416931152, | |
| "learning_rate": 6.150861660726515e-06, | |
| "loss": 0.3112, | |
| "step": 3232 | |
| }, | |
| { | |
| "epoch": 1.468946894689469, | |
| "grad_norm": 13.652670860290527, | |
| "learning_rate": 6.069163879175092e-06, | |
| "loss": 0.3403, | |
| "step": 3264 | |
| }, | |
| { | |
| "epoch": 1.4833483348334833, | |
| "grad_norm": 9.8715181350708, | |
| "learning_rate": 5.9871658524101565e-06, | |
| "loss": 0.3205, | |
| "step": 3296 | |
| }, | |
| { | |
| "epoch": 1.4977497749774977, | |
| "grad_norm": 11.28494644165039, | |
| "learning_rate": 5.9048906073179824e-06, | |
| "loss": 0.304, | |
| "step": 3328 | |
| }, | |
| { | |
| "epoch": 1.5121512151215122, | |
| "grad_norm": 13.887341499328613, | |
| "learning_rate": 5.822361248633973e-06, | |
| "loss": 0.3027, | |
| "step": 3360 | |
| }, | |
| { | |
| "epoch": 1.5265526552655264, | |
| "grad_norm": 7.218605041503906, | |
| "learning_rate": 5.7396009524543274e-06, | |
| "loss": 0.3081, | |
| "step": 3392 | |
| }, | |
| { | |
| "epoch": 1.5409540954095409, | |
| "grad_norm": 7.5199503898620605, | |
| "learning_rate": 5.656632959727683e-06, | |
| "loss": 0.2977, | |
| "step": 3424 | |
| }, | |
| { | |
| "epoch": 1.5553555355535553, | |
| "grad_norm": 9.384359359741211, | |
| "learning_rate": 5.57348056972852e-06, | |
| "loss": 0.3032, | |
| "step": 3456 | |
| }, | |
| { | |
| "epoch": 1.5697569756975698, | |
| "grad_norm": 14.385334014892578, | |
| "learning_rate": 5.492772883806706e-06, | |
| "loss": 0.3244, | |
| "step": 3488 | |
| }, | |
| { | |
| "epoch": 1.5841584158415842, | |
| "grad_norm": 3.620002269744873, | |
| "learning_rate": 5.40932574467665e-06, | |
| "loss": 0.2992, | |
| "step": 3520 | |
| }, | |
| { | |
| "epoch": 1.5985598559855987, | |
| "grad_norm": 9.094705581665039, | |
| "learning_rate": 5.325763657690609e-06, | |
| "loss": 0.2942, | |
| "step": 3552 | |
| }, | |
| { | |
| "epoch": 1.612961296129613, | |
| "grad_norm": 10.146318435668945, | |
| "learning_rate": 5.242110088958073e-06, | |
| "loss": 0.3395, | |
| "step": 3584 | |
| }, | |
| { | |
| "epoch": 1.6273627362736274, | |
| "grad_norm": 13.460920333862305, | |
| "learning_rate": 5.158388530278656e-06, | |
| "loss": 0.3097, | |
| "step": 3616 | |
| }, | |
| { | |
| "epoch": 1.6417641764176416, | |
| "grad_norm": 11.407035827636719, | |
| "learning_rate": 5.074622492545074e-06, | |
| "loss": 0.3159, | |
| "step": 3648 | |
| }, | |
| { | |
| "epoch": 1.656165616561656, | |
| "grad_norm": 10.230562210083008, | |
| "learning_rate": 4.9908354991407666e-06, | |
| "loss": 0.327, | |
| "step": 3680 | |
| }, | |
| { | |
| "epoch": 1.6705670567056705, | |
| "grad_norm": 11.073262214660645, | |
| "learning_rate": 4.9070510793339835e-06, | |
| "loss": 0.2981, | |
| "step": 3712 | |
| }, | |
| { | |
| "epoch": 1.684968496849685, | |
| "grad_norm": 12.47003173828125, | |
| "learning_rate": 4.823292761670264e-06, | |
| "loss": 0.3065, | |
| "step": 3744 | |
| }, | |
| { | |
| "epoch": 1.6993699369936994, | |
| "grad_norm": 25.917009353637695, | |
| "learning_rate": 4.74219897937266e-06, | |
| "loss": 0.2992, | |
| "step": 3776 | |
| }, | |
| { | |
| "epoch": 1.7137713771377139, | |
| "grad_norm": 11.52535629272461, | |
| "learning_rate": 4.658560774737667e-06, | |
| "loss": 0.3474, | |
| "step": 3808 | |
| }, | |
| { | |
| "epoch": 1.7281728172817283, | |
| "grad_norm": 9.606013298034668, | |
| "learning_rate": 4.5750184539003665e-06, | |
| "loss": 0.3436, | |
| "step": 3840 | |
| }, | |
| { | |
| "epoch": 1.7425742574257426, | |
| "grad_norm": 15.367290496826172, | |
| "learning_rate": 4.4915954774194676e-06, | |
| "loss": 0.2932, | |
| "step": 3872 | |
| }, | |
| { | |
| "epoch": 1.756975697569757, | |
| "grad_norm": 7.993281364440918, | |
| "learning_rate": 4.408315272339104e-06, | |
| "loss": 0.3203, | |
| "step": 3904 | |
| }, | |
| { | |
| "epoch": 1.7713771377137713, | |
| "grad_norm": 8.027710914611816, | |
| "learning_rate": 4.325201225609999e-06, | |
| "loss": 0.3139, | |
| "step": 3936 | |
| }, | |
| { | |
| "epoch": 1.7857785778577857, | |
| "grad_norm": 10.957657814025879, | |
| "learning_rate": 4.242276677521877e-06, | |
| "loss": 0.3453, | |
| "step": 3968 | |
| }, | |
| { | |
| "epoch": 1.8001800180018002, | |
| "grad_norm": 10.544370651245117, | |
| "learning_rate": 4.159564915148997e-06, | |
| "loss": 0.2853, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 1.8145814581458146, | |
| "grad_norm": 12.427223205566406, | |
| "learning_rate": 4.077089165810611e-06, | |
| "loss": 0.3355, | |
| "step": 4032 | |
| }, | |
| { | |
| "epoch": 1.828982898289829, | |
| "grad_norm": 18.81423568725586, | |
| "learning_rate": 3.994872590548211e-06, | |
| "loss": 0.328, | |
| "step": 4064 | |
| }, | |
| { | |
| "epoch": 1.8433843384338435, | |
| "grad_norm": 9.081976890563965, | |
| "learning_rate": 3.9129382776213945e-06, | |
| "loss": 0.293, | |
| "step": 4096 | |
| }, | |
| { | |
| "epoch": 1.8577857785778578, | |
| "grad_norm": 8.164251327514648, | |
| "learning_rate": 3.831309236024159e-06, | |
| "loss": 0.2782, | |
| "step": 4128 | |
| }, | |
| { | |
| "epoch": 1.8721872187218722, | |
| "grad_norm": 9.661165237426758, | |
| "learning_rate": 3.7500083890234606e-06, | |
| "loss": 0.3296, | |
| "step": 4160 | |
| }, | |
| { | |
| "epoch": 1.8865886588658864, | |
| "grad_norm": 14.657001495361328, | |
| "learning_rate": 3.66905856772185e-06, | |
| "loss": 0.2631, | |
| "step": 4192 | |
| }, | |
| { | |
| "epoch": 1.900990099009901, | |
| "grad_norm": 16.178007125854492, | |
| "learning_rate": 3.5884825046459805e-06, | |
| "loss": 0.2765, | |
| "step": 4224 | |
| }, | |
| { | |
| "epoch": 1.9153915391539154, | |
| "grad_norm": 7.737805366516113, | |
| "learning_rate": 3.508302827362805e-06, | |
| "loss": 0.317, | |
| "step": 4256 | |
| }, | |
| { | |
| "epoch": 1.9297929792979298, | |
| "grad_norm": 9.958755493164062, | |
| "learning_rate": 3.4285420521252533e-06, | |
| "loss": 0.2975, | |
| "step": 4288 | |
| }, | |
| { | |
| "epoch": 1.9441944194419443, | |
| "grad_norm": 13.483292579650879, | |
| "learning_rate": 3.3492225775491582e-06, | |
| "loss": 0.2776, | |
| "step": 4320 | |
| }, | |
| { | |
| "epoch": 1.9585958595859587, | |
| "grad_norm": 11.092999458312988, | |
| "learning_rate": 3.270366678323219e-06, | |
| "loss": 0.3453, | |
| "step": 4352 | |
| }, | |
| { | |
| "epoch": 1.972997299729973, | |
| "grad_norm": 11.395092964172363, | |
| "learning_rate": 3.1919964989537755e-06, | |
| "loss": 0.2678, | |
| "step": 4384 | |
| }, | |
| { | |
| "epoch": 1.9873987398739874, | |
| "grad_norm": 10.83203411102295, | |
| "learning_rate": 3.1141340475461316e-06, | |
| "loss": 0.3074, | |
| "step": 4416 | |
| }, | |
| { | |
| "epoch": 2.0018001800180016, | |
| "grad_norm": 6.892433166503906, | |
| "learning_rate": 3.03680118962418e-06, | |
| "loss": 0.3064, | |
| "step": 4448 | |
| }, | |
| { | |
| "epoch": 2.016201620162016, | |
| "grad_norm": 6.377696514129639, | |
| "learning_rate": 2.9600196419900795e-06, | |
| "loss": 0.1179, | |
| "step": 4480 | |
| }, | |
| { | |
| "epoch": 2.0306030603060305, | |
| "grad_norm": 8.865036964416504, | |
| "learning_rate": 2.883810966625684e-06, | |
| "loss": 0.1444, | |
| "step": 4512 | |
| }, | |
| { | |
| "epoch": 2.045004500450045, | |
| "grad_norm": 7.50280237197876, | |
| "learning_rate": 2.8081965646374582e-06, | |
| "loss": 0.1138, | |
| "step": 4544 | |
| }, | |
| { | |
| "epoch": 2.0594059405940595, | |
| "grad_norm": 9.762062072753906, | |
| "learning_rate": 2.7331976702465647e-06, | |
| "loss": 0.1251, | |
| "step": 4576 | |
| }, | |
| { | |
| "epoch": 2.073807380738074, | |
| "grad_norm": 10.026853561401367, | |
| "learning_rate": 2.658835344825821e-06, | |
| "loss": 0.1239, | |
| "step": 4608 | |
| }, | |
| { | |
| "epoch": 2.0882088208820884, | |
| "grad_norm": 10.256850242614746, | |
| "learning_rate": 2.5851304709851855e-06, | |
| "loss": 0.1131, | |
| "step": 4640 | |
| }, | |
| { | |
| "epoch": 2.102610261026103, | |
| "grad_norm": 11.005268096923828, | |
| "learning_rate": 2.5121037467074596e-06, | |
| "loss": 0.1224, | |
| "step": 4672 | |
| }, | |
| { | |
| "epoch": 2.117011701170117, | |
| "grad_norm": 11.805505752563477, | |
| "learning_rate": 2.4397756795358287e-06, | |
| "loss": 0.1101, | |
| "step": 4704 | |
| }, | |
| { | |
| "epoch": 2.1314131413141313, | |
| "grad_norm": 7.897261142730713, | |
| "learning_rate": 2.3703932820444233e-06, | |
| "loss": 0.1238, | |
| "step": 4736 | |
| }, | |
| { | |
| "epoch": 2.1458145814581457, | |
| "grad_norm": 10.117232322692871, | |
| "learning_rate": 2.2994998626884623e-06, | |
| "loss": 0.1072, | |
| "step": 4768 | |
| }, | |
| { | |
| "epoch": 2.16021602160216, | |
| "grad_norm": 9.720394134521484, | |
| "learning_rate": 2.2293648043808946e-06, | |
| "loss": 0.1377, | |
| "step": 4800 | |
| }, | |
| { | |
| "epoch": 2.1746174617461747, | |
| "grad_norm": 9.53496265411377, | |
| "learning_rate": 2.1600078026201977e-06, | |
| "loss": 0.114, | |
| "step": 4832 | |
| }, | |
| { | |
| "epoch": 2.189018901890189, | |
| "grad_norm": 9.775762557983398, | |
| "learning_rate": 2.091448334409112e-06, | |
| "loss": 0.1053, | |
| "step": 4864 | |
| }, | |
| { | |
| "epoch": 2.2034203420342036, | |
| "grad_norm": 11.364226341247559, | |
| "learning_rate": 2.0237056527850555e-06, | |
| "loss": 0.0966, | |
| "step": 4896 | |
| }, | |
| { | |
| "epoch": 2.217821782178218, | |
| "grad_norm": 17.391111373901367, | |
| "learning_rate": 1.95679878141344e-06, | |
| "loss": 0.1016, | |
| "step": 4928 | |
| }, | |
| { | |
| "epoch": 2.232223222322232, | |
| "grad_norm": 10.15132999420166, | |
| "learning_rate": 1.8907465092453986e-06, | |
| "loss": 0.1184, | |
| "step": 4960 | |
| }, | |
| { | |
| "epoch": 2.2466246624662465, | |
| "grad_norm": 12.572965621948242, | |
| "learning_rate": 1.8255673852414274e-06, | |
| "loss": 0.1015, | |
| "step": 4992 | |
| }, | |
| { | |
| "epoch": 2.261026102610261, | |
| "grad_norm": 5.2437968254089355, | |
| "learning_rate": 1.7612797131624243e-06, | |
| "loss": 0.0993, | |
| "step": 5024 | |
| }, | |
| { | |
| "epoch": 2.2754275427542754, | |
| "grad_norm": 8.918633460998535, | |
| "learning_rate": 1.6979015464295785e-06, | |
| "loss": 0.1079, | |
| "step": 5056 | |
| }, | |
| { | |
| "epoch": 2.28982898289829, | |
| "grad_norm": 9.719175338745117, | |
| "learning_rate": 1.6354506830545625e-06, | |
| "loss": 0.1016, | |
| "step": 5088 | |
| }, | |
| { | |
| "epoch": 2.3042304230423043, | |
| "grad_norm": 24.36786460876465, | |
| "learning_rate": 1.5739446606414522e-06, | |
| "loss": 0.1342, | |
| "step": 5120 | |
| }, | |
| { | |
| "epoch": 2.3186318631863188, | |
| "grad_norm": 7.500924587249756, | |
| "learning_rate": 1.5134007514617827e-06, | |
| "loss": 0.0955, | |
| "step": 5152 | |
| }, | |
| { | |
| "epoch": 2.333033303330333, | |
| "grad_norm": 10.143550872802734, | |
| "learning_rate": 1.4538359576040923e-06, | |
| "loss": 0.1451, | |
| "step": 5184 | |
| }, | |
| { | |
| "epoch": 2.3474347434743477, | |
| "grad_norm": 15.87746524810791, | |
| "learning_rate": 1.395267006199363e-06, | |
| "loss": 0.1329, | |
| "step": 5216 | |
| }, | |
| { | |
| "epoch": 2.3618361836183617, | |
| "grad_norm": 12.571290016174316, | |
| "learning_rate": 1.33771034472367e-06, | |
| "loss": 0.0978, | |
| "step": 5248 | |
| }, | |
| { | |
| "epoch": 2.376237623762376, | |
| "grad_norm": 11.44887924194336, | |
| "learning_rate": 1.2811821363793497e-06, | |
| "loss": 0.1037, | |
| "step": 5280 | |
| }, | |
| { | |
| "epoch": 2.3906390639063906, | |
| "grad_norm": 11.863057136535645, | |
| "learning_rate": 1.2256982555560243e-06, | |
| "loss": 0.116, | |
| "step": 5312 | |
| }, | |
| { | |
| "epoch": 2.405040504050405, | |
| "grad_norm": 5.990699768066406, | |
| "learning_rate": 1.171274283372703e-06, | |
| "loss": 0.1037, | |
| "step": 5344 | |
| }, | |
| { | |
| "epoch": 2.4194419441944195, | |
| "grad_norm": 10.163458824157715, | |
| "learning_rate": 1.1179255033022624e-06, | |
| "loss": 0.1387, | |
| "step": 5376 | |
| }, | |
| { | |
| "epoch": 2.433843384338434, | |
| "grad_norm": 11.073272705078125, | |
| "learning_rate": 1.0672833301104142e-06, | |
| "loss": 0.12, | |
| "step": 5408 | |
| }, | |
| { | |
| "epoch": 2.4482448244824484, | |
| "grad_norm": 10.601714134216309, | |
| "learning_rate": 1.016094827126849e-06, | |
| "loss": 0.0957, | |
| "step": 5440 | |
| }, | |
| { | |
| "epoch": 2.4626462646264624, | |
| "grad_norm": 7.653828144073486, | |
| "learning_rate": 9.660250941303178e-07, | |
| "loss": 0.113, | |
| "step": 5472 | |
| }, | |
| { | |
| "epoch": 2.477047704770477, | |
| "grad_norm": 10.332706451416016, | |
| "learning_rate": 9.170881918256042e-07, | |
| "loss": 0.1001, | |
| "step": 5504 | |
| }, | |
| { | |
| "epoch": 2.4914491449144913, | |
| "grad_norm": 7.20733642578125, | |
| "learning_rate": 8.692978627932148e-07, | |
| "loss": 0.0998, | |
| "step": 5536 | |
| }, | |
| { | |
| "epoch": 2.5058505850585058, | |
| "grad_norm": 4.12467622756958, | |
| "learning_rate": 8.226675276301416e-07, | |
| "loss": 0.0855, | |
| "step": 5568 | |
| }, | |
| { | |
| "epoch": 2.5202520252025202, | |
| "grad_norm": 9.155281066894531, | |
| "learning_rate": 7.772102811810689e-07, | |
| "loss": 0.122, | |
| "step": 5600 | |
| }, | |
| { | |
| "epoch": 2.5346534653465347, | |
| "grad_norm": 5.481142997741699, | |
| "learning_rate": 7.329388888610384e-07, | |
| "loss": 0.0978, | |
| "step": 5632 | |
| }, | |
| { | |
| "epoch": 2.549054905490549, | |
| "grad_norm": 7.971097946166992, | |
| "learning_rate": 6.898657830706367e-07, | |
| "loss": 0.1157, | |
| "step": 5664 | |
| }, | |
| { | |
| "epoch": 2.5634563456345636, | |
| "grad_norm": 11.405919075012207, | |
| "learning_rate": 6.492928309381779e-07, | |
| "loss": 0.0979, | |
| "step": 5696 | |
| }, | |
| { | |
| "epoch": 2.577857785778578, | |
| "grad_norm": 13.61896800994873, | |
| "learning_rate": 6.08613879617217e-07, | |
| "loss": 0.1041, | |
| "step": 5728 | |
| }, | |
| { | |
| "epoch": 2.592259225922592, | |
| "grad_norm": 29.495868682861328, | |
| "learning_rate": 5.691681280788214e-07, | |
| "loss": 0.0992, | |
| "step": 5760 | |
| }, | |
| { | |
| "epoch": 2.6066606660666065, | |
| "grad_norm": 11.616131782531738, | |
| "learning_rate": 5.309666535753417e-07, | |
| "loss": 0.1051, | |
| "step": 5792 | |
| }, | |
| { | |
| "epoch": 2.621062106210621, | |
| "grad_norm": 8.599799156188965, | |
| "learning_rate": 4.940201839382114e-07, | |
| "loss": 0.1091, | |
| "step": 5824 | |
| }, | |
| { | |
| "epoch": 2.6354635463546354, | |
| "grad_norm": 12.70768928527832, | |
| "learning_rate": 4.5833909456532764e-07, | |
| "loss": 0.1111, | |
| "step": 5856 | |
| }, | |
| { | |
| "epoch": 2.64986498649865, | |
| "grad_norm": 11.202815055847168, | |
| "learning_rate": 4.2393340550740844e-07, | |
| "loss": 0.1046, | |
| "step": 5888 | |
| }, | |
| { | |
| "epoch": 2.6642664266426643, | |
| "grad_norm": 11.233574867248535, | |
| "learning_rate": 3.908127786541427e-07, | |
| "loss": 0.1022, | |
| "step": 5920 | |
| }, | |
| { | |
| "epoch": 2.678667866786679, | |
| "grad_norm": 9.141092300415039, | |
| "learning_rate": 3.589865150209071e-07, | |
| "loss": 0.0984, | |
| "step": 5952 | |
| }, | |
| { | |
| "epoch": 2.693069306930693, | |
| "grad_norm": 15.323395729064941, | |
| "learning_rate": 3.2846355213683456e-07, | |
| "loss": 0.1279, | |
| "step": 5984 | |
| }, | |
| { | |
| "epoch": 2.7074707470747077, | |
| "grad_norm": 10.427877426147461, | |
| "learning_rate": 2.9925246153496067e-07, | |
| "loss": 0.1169, | |
| "step": 6016 | |
| }, | |
| { | |
| "epoch": 2.7218721872187217, | |
| "grad_norm": 12.140647888183594, | |
| "learning_rate": 2.713614463451364e-07, | |
| "loss": 0.1267, | |
| "step": 6048 | |
| }, | |
| { | |
| "epoch": 2.736273627362736, | |
| "grad_norm": 13.221187591552734, | |
| "learning_rate": 2.4479833899041183e-07, | |
| "loss": 0.1198, | |
| "step": 6080 | |
| }, | |
| { | |
| "epoch": 2.7506750675067506, | |
| "grad_norm": 5.667657852172852, | |
| "learning_rate": 2.19570598987513e-07, | |
| "loss": 0.1312, | |
| "step": 6112 | |
| }, | |
| { | |
| "epoch": 2.765076507650765, | |
| "grad_norm": 0.735275149345398, | |
| "learning_rate": 1.9568531085204067e-07, | |
| "loss": 0.0699, | |
| "step": 6144 | |
| }, | |
| { | |
| "epoch": 2.7794779477947795, | |
| "grad_norm": 4.510382652282715, | |
| "learning_rate": 1.731491821089848e-07, | |
| "loss": 0.0965, | |
| "step": 6176 | |
| }, | |
| { | |
| "epoch": 2.793879387938794, | |
| "grad_norm": 13.006752967834473, | |
| "learning_rate": 1.5196854140909545e-07, | |
| "loss": 0.1026, | |
| "step": 6208 | |
| }, | |
| { | |
| "epoch": 2.8082808280828084, | |
| "grad_norm": 10.59677791595459, | |
| "learning_rate": 1.321493367516574e-07, | |
| "loss": 0.1093, | |
| "step": 6240 | |
| }, | |
| { | |
| "epoch": 2.8226822682268224, | |
| "grad_norm": 8.108485221862793, | |
| "learning_rate": 1.136971338141596e-07, | |
| "loss": 0.1191, | |
| "step": 6272 | |
| }, | |
| { | |
| "epoch": 2.8370837083708373, | |
| "grad_norm": 11.026812553405762, | |
| "learning_rate": 9.661711438932686e-08, | |
| "loss": 0.1008, | |
| "step": 6304 | |
| }, | |
| { | |
| "epoch": 2.8514851485148514, | |
| "grad_norm": 7.0519890785217285, | |
| "learning_rate": 8.09140749299564e-08, | |
| "loss": 0.1386, | |
| "step": 6336 | |
| }, | |
| { | |
| "epoch": 2.865886588658866, | |
| "grad_norm": 27.898834228515625, | |
| "learning_rate": 6.659242520196562e-08, | |
| "loss": 0.103, | |
| "step": 6368 | |
| }, | |
| { | |
| "epoch": 2.8802880288028803, | |
| "grad_norm": 7.882510185241699, | |
| "learning_rate": 5.365618704603392e-08, | |
| "loss": 0.0812, | |
| "step": 6400 | |
| }, | |
| { | |
| "epoch": 2.8946894689468947, | |
| "grad_norm": 15.030416488647461, | |
| "learning_rate": 4.2108993248173855e-08, | |
| "loss": 0.102, | |
| "step": 6432 | |
| }, | |
| { | |
| "epoch": 2.909090909090909, | |
| "grad_norm": 11.23343276977539, | |
| "learning_rate": 3.195408651956944e-08, | |
| "loss": 0.0976, | |
| "step": 6464 | |
| }, | |
| { | |
| "epoch": 2.9234923492349236, | |
| "grad_norm": 6.837776184082031, | |
| "learning_rate": 2.3194318585945673e-08, | |
| "loss": 0.0938, | |
| "step": 6496 | |
| }, | |
| { | |
| "epoch": 2.937893789378938, | |
| "grad_norm": 9.184088706970215, | |
| "learning_rate": 1.583214938674138e-08, | |
| "loss": 0.1087, | |
| "step": 6528 | |
| }, | |
| { | |
| "epoch": 2.952295229522952, | |
| "grad_norm": 7.366724967956543, | |
| "learning_rate": 1.0034770609533285e-08, | |
| "loss": 0.1139, | |
| "step": 6560 | |
| }, | |
| { | |
| "epoch": 2.9666966696669665, | |
| "grad_norm": 10.501226425170898, | |
| "learning_rate": 5.429794877803151e-09, | |
| "loss": 0.0928, | |
| "step": 6592 | |
| }, | |
| { | |
| "epoch": 2.981098109810981, | |
| "grad_norm": 7.6273908615112305, | |
| "learning_rate": 2.2274065574556804e-09, | |
| "loss": 0.1124, | |
| "step": 6624 | |
| }, | |
| { | |
| "epoch": 2.9954995499549955, | |
| "grad_norm": 5.487658977508545, | |
| "learning_rate": 4.2850495100610344e-10, | |
| "loss": 0.1158, | |
| "step": 6656 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "step": 6666, | |
| "total_flos": 9491636163499776.0, | |
| "train_loss": 0.3371730904088448, | |
| "train_runtime": 4274.2917, | |
| "train_samples_per_second": 1.56, | |
| "train_steps_per_second": 1.56 | |
| } | |
| ], | |
| "logging_steps": 32, | |
| "max_steps": 6666, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 3, | |
| "save_steps": 500, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 9491636163499776.0, | |
| "train_batch_size": 1, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |