| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 1.3535169785169785, | |
| "eval_steps": 500, | |
| "global_step": 500000, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.0, | |
| "learning_rate": 6.2499999999999995e-06, | |
| "loss": 7.4049, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.0, | |
| "learning_rate": 1.2499999999999999e-05, | |
| "loss": 4.7791, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.0, | |
| "learning_rate": 1.875e-05, | |
| "loss": 3.8115, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 2.4999999999999998e-05, | |
| "loss": 3.3298, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 3.125e-05, | |
| "loss": 3.0365, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 3.75e-05, | |
| "loss": 2.8445, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 4.375e-05, | |
| "loss": 2.7101, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 4.9999999999999996e-05, | |
| "loss": 2.6043, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 5.625e-05, | |
| "loss": 2.5286, | |
| "step": 4500 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 6.25e-05, | |
| "loss": 2.467, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 6.874999999999999e-05, | |
| "loss": 2.4192, | |
| "step": 5500 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 7.5e-05, | |
| "loss": 2.3785, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 8.124999999999998e-05, | |
| "loss": 2.3456, | |
| "step": 6500 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 8.75e-05, | |
| "loss": 2.3183, | |
| "step": 7000 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 9.374999999999999e-05, | |
| "loss": 2.3005, | |
| "step": 7500 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 9.999999999999999e-05, | |
| "loss": 2.2808, | |
| "step": 8000 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 0.00010625, | |
| "loss": 2.2722, | |
| "step": 8500 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 0.0001125, | |
| "loss": 2.2577, | |
| "step": 9000 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 0.00011874999999999999, | |
| "loss": 2.2479, | |
| "step": 9500 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 0.000125, | |
| "loss": 2.2452, | |
| "step": 10000 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 0.00013125, | |
| "loss": 2.2419, | |
| "step": 10500 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 0.00013749999999999998, | |
| "loss": 2.2345, | |
| "step": 11000 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 0.00014375, | |
| "loss": 2.2276, | |
| "step": 11500 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 0.00015, | |
| "loss": 2.2285, | |
| "step": 12000 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 0.00015625, | |
| "loss": 2.2178, | |
| "step": 12500 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 0.00016249999999999997, | |
| "loss": 2.2214, | |
| "step": 13000 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 0.00016874999999999998, | |
| "loss": 2.2269, | |
| "step": 13500 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 0.000175, | |
| "loss": 2.2322, | |
| "step": 14000 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 0.00018124999999999996, | |
| "loss": 2.2292, | |
| "step": 14500 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 0.00018749999999999998, | |
| "loss": 2.2274, | |
| "step": 15000 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 0.00019375, | |
| "loss": 2.232, | |
| "step": 15500 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 0.00019999999999999998, | |
| "loss": 2.2316, | |
| "step": 16000 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 0.00020624999999999997, | |
| "loss": 2.238, | |
| "step": 16500 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 0.0002125, | |
| "loss": 2.2426, | |
| "step": 17000 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 0.00021874999999999998, | |
| "loss": 2.2408, | |
| "step": 17500 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 0.000225, | |
| "loss": 2.2456, | |
| "step": 18000 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 0.00023124999999999998, | |
| "loss": 2.2473, | |
| "step": 18500 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 0.00023749999999999997, | |
| "loss": 2.2549, | |
| "step": 19000 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 0.00024375, | |
| "loss": 2.2542, | |
| "step": 19500 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 0.00025, | |
| "loss": 2.2604, | |
| "step": 20000 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "learning_rate": 0.00025624999999999997, | |
| "loss": 2.2626, | |
| "step": 20500 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "learning_rate": 0.0002625, | |
| "loss": 2.2694, | |
| "step": 21000 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "learning_rate": 0.00026875, | |
| "loss": 2.2724, | |
| "step": 21500 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "learning_rate": 0.00027499999999999996, | |
| "loss": 2.2722, | |
| "step": 22000 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "learning_rate": 0.00028125, | |
| "loss": 2.2807, | |
| "step": 22500 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "learning_rate": 0.0002875, | |
| "loss": 2.2846, | |
| "step": 23000 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "learning_rate": 0.00029374999999999996, | |
| "loss": 2.292, | |
| "step": 23500 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "learning_rate": 0.0003, | |
| "loss": 2.297, | |
| "step": 24000 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "learning_rate": 0.0002996848739495798, | |
| "loss": 2.3038, | |
| "step": 24500 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "learning_rate": 0.00029936974789915966, | |
| "loss": 2.2998, | |
| "step": 25000 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "learning_rate": 0.0002990546218487395, | |
| "loss": 2.2977, | |
| "step": 25500 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "learning_rate": 0.0002987394957983193, | |
| "loss": 2.2925, | |
| "step": 26000 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "learning_rate": 0.0002984243697478991, | |
| "loss": 2.2943, | |
| "step": 26500 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "learning_rate": 0.000298109243697479, | |
| "loss": 2.2899, | |
| "step": 27000 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "learning_rate": 0.0002977941176470588, | |
| "loss": 2.2899, | |
| "step": 27500 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "learning_rate": 0.0002974789915966386, | |
| "loss": 2.2806, | |
| "step": 28000 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "learning_rate": 0.0002971638655462185, | |
| "loss": 2.2823, | |
| "step": 28500 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "learning_rate": 0.0002968487394957983, | |
| "loss": 2.2795, | |
| "step": 29000 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "learning_rate": 0.00029653361344537813, | |
| "loss": 2.2736, | |
| "step": 29500 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "learning_rate": 0.00029621848739495795, | |
| "loss": 2.2704, | |
| "step": 30000 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "learning_rate": 0.0002959033613445378, | |
| "loss": 2.269, | |
| "step": 30500 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "learning_rate": 0.00029558823529411763, | |
| "loss": 2.2601, | |
| "step": 31000 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "learning_rate": 0.00029527310924369745, | |
| "loss": 2.2674, | |
| "step": 31500 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "learning_rate": 0.00029495798319327727, | |
| "loss": 2.2575, | |
| "step": 32000 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "learning_rate": 0.0002946428571428571, | |
| "loss": 2.2583, | |
| "step": 32500 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "learning_rate": 0.00029432773109243696, | |
| "loss": 2.2494, | |
| "step": 33000 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "learning_rate": 0.0002940126050420168, | |
| "loss": 2.2412, | |
| "step": 33500 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "learning_rate": 0.0002936974789915966, | |
| "loss": 2.2453, | |
| "step": 34000 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "learning_rate": 0.0002933823529411764, | |
| "loss": 2.2401, | |
| "step": 34500 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "learning_rate": 0.0002930672268907563, | |
| "loss": 2.2375, | |
| "step": 35000 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "learning_rate": 0.0002927521008403361, | |
| "loss": 2.2302, | |
| "step": 35500 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "learning_rate": 0.0002924369747899159, | |
| "loss": 2.2275, | |
| "step": 36000 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "learning_rate": 0.0002921218487394958, | |
| "loss": 2.223, | |
| "step": 36500 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "learning_rate": 0.0002918067226890756, | |
| "loss": 2.2252, | |
| "step": 37000 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "learning_rate": 0.0002914915966386554, | |
| "loss": 2.2238, | |
| "step": 37500 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "learning_rate": 0.00029117647058823524, | |
| "loss": 2.2206, | |
| "step": 38000 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "learning_rate": 0.0002908613445378151, | |
| "loss": 2.2162, | |
| "step": 38500 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "learning_rate": 0.00029054621848739493, | |
| "loss": 2.2141, | |
| "step": 39000 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "learning_rate": 0.00029023109243697475, | |
| "loss": 2.2215, | |
| "step": 39500 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "learning_rate": 0.0002899159663865546, | |
| "loss": 2.2107, | |
| "step": 40000 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "learning_rate": 0.00028960084033613444, | |
| "loss": 2.2125, | |
| "step": 40500 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "learning_rate": 0.00028928571428571425, | |
| "loss": 2.2086, | |
| "step": 41000 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "learning_rate": 0.00028897058823529407, | |
| "loss": 2.2097, | |
| "step": 41500 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "learning_rate": 0.00028865546218487394, | |
| "loss": 2.1996, | |
| "step": 42000 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "learning_rate": 0.00028834033613445376, | |
| "loss": 2.1972, | |
| "step": 42500 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "learning_rate": 0.0002880252100840336, | |
| "loss": 2.1906, | |
| "step": 43000 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "learning_rate": 0.00028771008403361345, | |
| "loss": 2.2015, | |
| "step": 43500 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "learning_rate": 0.00028739495798319327, | |
| "loss": 2.1927, | |
| "step": 44000 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "learning_rate": 0.0002870798319327731, | |
| "loss": 2.1889, | |
| "step": 44500 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "learning_rate": 0.00028676470588235296, | |
| "loss": 2.1904, | |
| "step": 45000 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "learning_rate": 0.0002864495798319328, | |
| "loss": 2.1809, | |
| "step": 45500 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "learning_rate": 0.0002861344537815126, | |
| "loss": 2.1777, | |
| "step": 46000 | |
| }, | |
| { | |
| "epoch": 0.13, | |
| "learning_rate": 0.0002858193277310924, | |
| "loss": 2.1772, | |
| "step": 46500 | |
| }, | |
| { | |
| "epoch": 0.13, | |
| "learning_rate": 0.0002855042016806722, | |
| "loss": 2.1714, | |
| "step": 47000 | |
| }, | |
| { | |
| "epoch": 0.13, | |
| "learning_rate": 0.00028518907563025204, | |
| "loss": 2.171, | |
| "step": 47500 | |
| }, | |
| { | |
| "epoch": 0.13, | |
| "learning_rate": 0.0002848739495798319, | |
| "loss": 2.1814, | |
| "step": 48000 | |
| }, | |
| { | |
| "epoch": 0.13, | |
| "learning_rate": 0.00028455882352941173, | |
| "loss": 2.1713, | |
| "step": 48500 | |
| }, | |
| { | |
| "epoch": 0.13, | |
| "learning_rate": 0.00028424369747899155, | |
| "loss": 2.168, | |
| "step": 49000 | |
| }, | |
| { | |
| "epoch": 0.13, | |
| "learning_rate": 0.00028392857142857137, | |
| "loss": 2.1635, | |
| "step": 49500 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "learning_rate": 0.00028361344537815124, | |
| "loss": 2.1689, | |
| "step": 50000 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "learning_rate": 0.00028329831932773106, | |
| "loss": 2.1614, | |
| "step": 50500 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "learning_rate": 0.0002829831932773109, | |
| "loss": 2.1594, | |
| "step": 51000 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "learning_rate": 0.00028266806722689075, | |
| "loss": 2.1585, | |
| "step": 51500 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "learning_rate": 0.00028235294117647056, | |
| "loss": 2.1639, | |
| "step": 52000 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "learning_rate": 0.0002820378151260504, | |
| "loss": 2.1574, | |
| "step": 52500 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "learning_rate": 0.00028172268907563025, | |
| "loss": 2.1512, | |
| "step": 53000 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "learning_rate": 0.00028140756302521007, | |
| "loss": 2.1571, | |
| "step": 53500 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "learning_rate": 0.0002810924369747899, | |
| "loss": 2.1513, | |
| "step": 54000 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "learning_rate": 0.0002807773109243697, | |
| "loss": 2.1503, | |
| "step": 54500 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "learning_rate": 0.0002804621848739496, | |
| "loss": 2.1462, | |
| "step": 55000 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "learning_rate": 0.0002801470588235294, | |
| "loss": 2.1449, | |
| "step": 55500 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "learning_rate": 0.0002798319327731092, | |
| "loss": 2.1383, | |
| "step": 56000 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "learning_rate": 0.0002795168067226891, | |
| "loss": 2.1431, | |
| "step": 56500 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "learning_rate": 0.0002792016806722689, | |
| "loss": 2.1349, | |
| "step": 57000 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "learning_rate": 0.0002788865546218487, | |
| "loss": 2.1423, | |
| "step": 57500 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "learning_rate": 0.00027857142857142854, | |
| "loss": 2.1375, | |
| "step": 58000 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "learning_rate": 0.0002782563025210084, | |
| "loss": 2.1261, | |
| "step": 58500 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "learning_rate": 0.0002779411764705882, | |
| "loss": 2.1277, | |
| "step": 59000 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "learning_rate": 0.00027762605042016804, | |
| "loss": 2.1204, | |
| "step": 59500 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "learning_rate": 0.0002773109243697479, | |
| "loss": 2.1277, | |
| "step": 60000 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "learning_rate": 0.00027699579831932773, | |
| "loss": 2.1246, | |
| "step": 60500 | |
| }, | |
| { | |
| "epoch": 0.17, | |
| "learning_rate": 0.00027668067226890755, | |
| "loss": 2.1207, | |
| "step": 61000 | |
| }, | |
| { | |
| "epoch": 0.17, | |
| "learning_rate": 0.00027636554621848737, | |
| "loss": 2.1161, | |
| "step": 61500 | |
| }, | |
| { | |
| "epoch": 0.17, | |
| "learning_rate": 0.0002760504201680672, | |
| "loss": 2.1198, | |
| "step": 62000 | |
| }, | |
| { | |
| "epoch": 0.17, | |
| "learning_rate": 0.000275735294117647, | |
| "loss": 2.1122, | |
| "step": 62500 | |
| }, | |
| { | |
| "epoch": 0.17, | |
| "learning_rate": 0.00027542016806722687, | |
| "loss": 2.1113, | |
| "step": 63000 | |
| }, | |
| { | |
| "epoch": 0.17, | |
| "learning_rate": 0.0002751050420168067, | |
| "loss": 2.1164, | |
| "step": 63500 | |
| }, | |
| { | |
| "epoch": 0.17, | |
| "learning_rate": 0.0002747899159663865, | |
| "loss": 2.1096, | |
| "step": 64000 | |
| }, | |
| { | |
| "epoch": 0.17, | |
| "learning_rate": 0.0002744747899159664, | |
| "loss": 2.106, | |
| "step": 64500 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "learning_rate": 0.0002741596638655462, | |
| "loss": 2.1015, | |
| "step": 65000 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "learning_rate": 0.000273844537815126, | |
| "loss": 2.1041, | |
| "step": 65500 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "learning_rate": 0.00027352941176470583, | |
| "loss": 2.0991, | |
| "step": 66000 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "learning_rate": 0.0002732142857142857, | |
| "loss": 2.0956, | |
| "step": 66500 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "learning_rate": 0.0002728991596638655, | |
| "loss": 2.0945, | |
| "step": 67000 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "learning_rate": 0.00027258403361344534, | |
| "loss": 2.0958, | |
| "step": 67500 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "learning_rate": 0.0002722689075630252, | |
| "loss": 2.1115, | |
| "step": 68000 | |
| }, | |
| { | |
| "epoch": 0.19, | |
| "learning_rate": 0.000271953781512605, | |
| "loss": 2.1205, | |
| "step": 68500 | |
| }, | |
| { | |
| "epoch": 0.19, | |
| "learning_rate": 0.00027163865546218484, | |
| "loss": 2.1098, | |
| "step": 69000 | |
| }, | |
| { | |
| "epoch": 0.19, | |
| "learning_rate": 0.00027132352941176466, | |
| "loss": 2.1046, | |
| "step": 69500 | |
| }, | |
| { | |
| "epoch": 0.19, | |
| "learning_rate": 0.00027100840336134453, | |
| "loss": 2.1013, | |
| "step": 70000 | |
| }, | |
| { | |
| "epoch": 0.19, | |
| "learning_rate": 0.00027069327731092435, | |
| "loss": 2.0989, | |
| "step": 70500 | |
| }, | |
| { | |
| "epoch": 0.19, | |
| "learning_rate": 0.00027037815126050417, | |
| "loss": 2.0927, | |
| "step": 71000 | |
| }, | |
| { | |
| "epoch": 0.19, | |
| "learning_rate": 0.00027006302521008404, | |
| "loss": 2.0827, | |
| "step": 71500 | |
| }, | |
| { | |
| "epoch": 0.19, | |
| "learning_rate": 0.00026974789915966386, | |
| "loss": 2.0851, | |
| "step": 72000 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "learning_rate": 0.0002694327731092437, | |
| "loss": 2.0841, | |
| "step": 72500 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "learning_rate": 0.0002691176470588235, | |
| "loss": 2.0816, | |
| "step": 73000 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "learning_rate": 0.00026880252100840336, | |
| "loss": 2.0761, | |
| "step": 73500 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "learning_rate": 0.0002684873949579832, | |
| "loss": 2.0745, | |
| "step": 74000 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "learning_rate": 0.000268172268907563, | |
| "loss": 2.0762, | |
| "step": 74500 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "learning_rate": 0.00026785714285714287, | |
| "loss": 2.0704, | |
| "step": 75000 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "learning_rate": 0.0002675420168067227, | |
| "loss": 2.0737, | |
| "step": 75500 | |
| }, | |
| { | |
| "epoch": 0.21, | |
| "learning_rate": 0.0002672268907563025, | |
| "loss": 2.0687, | |
| "step": 76000 | |
| }, | |
| { | |
| "epoch": 0.21, | |
| "learning_rate": 0.0002669117647058823, | |
| "loss": 2.0743, | |
| "step": 76500 | |
| }, | |
| { | |
| "epoch": 0.21, | |
| "learning_rate": 0.00026659663865546214, | |
| "loss": 2.0758, | |
| "step": 77000 | |
| }, | |
| { | |
| "epoch": 0.21, | |
| "learning_rate": 0.00026628151260504196, | |
| "loss": 2.0677, | |
| "step": 77500 | |
| }, | |
| { | |
| "epoch": 0.21, | |
| "learning_rate": 0.00026596638655462183, | |
| "loss": 2.0643, | |
| "step": 78000 | |
| }, | |
| { | |
| "epoch": 0.21, | |
| "learning_rate": 0.00026565126050420165, | |
| "loss": 2.0588, | |
| "step": 78500 | |
| }, | |
| { | |
| "epoch": 0.21, | |
| "learning_rate": 0.00026533613445378146, | |
| "loss": 2.0676, | |
| "step": 79000 | |
| }, | |
| { | |
| "epoch": 0.22, | |
| "learning_rate": 0.00026502100840336134, | |
| "loss": 2.065, | |
| "step": 79500 | |
| }, | |
| { | |
| "epoch": 0.22, | |
| "learning_rate": 0.00026470588235294115, | |
| "loss": 2.064, | |
| "step": 80000 | |
| }, | |
| { | |
| "epoch": 0.22, | |
| "learning_rate": 0.00026439075630252097, | |
| "loss": 2.0579, | |
| "step": 80500 | |
| }, | |
| { | |
| "epoch": 0.22, | |
| "learning_rate": 0.0002640756302521008, | |
| "loss": 2.0614, | |
| "step": 81000 | |
| }, | |
| { | |
| "epoch": 0.22, | |
| "learning_rate": 0.00026376050420168066, | |
| "loss": 2.0664, | |
| "step": 81500 | |
| }, | |
| { | |
| "epoch": 0.22, | |
| "learning_rate": 0.0002634453781512605, | |
| "loss": 2.0648, | |
| "step": 82000 | |
| }, | |
| { | |
| "epoch": 0.22, | |
| "learning_rate": 0.0002631302521008403, | |
| "loss": 2.0564, | |
| "step": 82500 | |
| }, | |
| { | |
| "epoch": 0.22, | |
| "learning_rate": 0.00026281512605042017, | |
| "loss": 2.0569, | |
| "step": 83000 | |
| }, | |
| { | |
| "epoch": 0.23, | |
| "learning_rate": 0.0002625, | |
| "loss": 2.0517, | |
| "step": 83500 | |
| }, | |
| { | |
| "epoch": 0.23, | |
| "learning_rate": 0.0002621848739495798, | |
| "loss": 2.0453, | |
| "step": 84000 | |
| }, | |
| { | |
| "epoch": 0.23, | |
| "learning_rate": 0.0002618697478991596, | |
| "loss": 2.0447, | |
| "step": 84500 | |
| }, | |
| { | |
| "epoch": 0.23, | |
| "learning_rate": 0.0002615546218487395, | |
| "loss": 2.0431, | |
| "step": 85000 | |
| }, | |
| { | |
| "epoch": 0.23, | |
| "learning_rate": 0.0002612394957983193, | |
| "loss": 2.0414, | |
| "step": 85500 | |
| }, | |
| { | |
| "epoch": 0.23, | |
| "learning_rate": 0.0002609243697478991, | |
| "loss": 2.0381, | |
| "step": 86000 | |
| }, | |
| { | |
| "epoch": 0.23, | |
| "learning_rate": 0.000260609243697479, | |
| "loss": 2.0392, | |
| "step": 86500 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "learning_rate": 0.0002602941176470588, | |
| "loss": 2.0317, | |
| "step": 87000 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "learning_rate": 0.00025997899159663863, | |
| "loss": 2.0338, | |
| "step": 87500 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "learning_rate": 0.00025966386554621845, | |
| "loss": 2.0348, | |
| "step": 88000 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "learning_rate": 0.0002593487394957983, | |
| "loss": 2.0349, | |
| "step": 88500 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "learning_rate": 0.00025903361344537814, | |
| "loss": 2.0295, | |
| "step": 89000 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "learning_rate": 0.00025871848739495796, | |
| "loss": 2.0253, | |
| "step": 89500 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "learning_rate": 0.00025840336134453783, | |
| "loss": 2.0272, | |
| "step": 90000 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "learning_rate": 0.00025808823529411764, | |
| "loss": 2.0273, | |
| "step": 90500 | |
| }, | |
| { | |
| "epoch": 0.25, | |
| "learning_rate": 0.00025777310924369746, | |
| "loss": 2.0295, | |
| "step": 91000 | |
| }, | |
| { | |
| "epoch": 0.25, | |
| "learning_rate": 0.0002574579831932773, | |
| "loss": 2.0284, | |
| "step": 91500 | |
| }, | |
| { | |
| "epoch": 0.25, | |
| "learning_rate": 0.0002571428571428571, | |
| "loss": 2.0335, | |
| "step": 92000 | |
| }, | |
| { | |
| "epoch": 0.25, | |
| "learning_rate": 0.0002568277310924369, | |
| "loss": 2.032, | |
| "step": 92500 | |
| }, | |
| { | |
| "epoch": 0.25, | |
| "learning_rate": 0.0002565126050420168, | |
| "loss": 2.0256, | |
| "step": 93000 | |
| }, | |
| { | |
| "epoch": 0.25, | |
| "learning_rate": 0.0002561974789915966, | |
| "loss": 2.0278, | |
| "step": 93500 | |
| }, | |
| { | |
| "epoch": 0.25, | |
| "learning_rate": 0.0002558823529411764, | |
| "loss": 2.0226, | |
| "step": 94000 | |
| }, | |
| { | |
| "epoch": 0.26, | |
| "learning_rate": 0.0002555672268907563, | |
| "loss": 2.0186, | |
| "step": 94500 | |
| }, | |
| { | |
| "epoch": 0.26, | |
| "learning_rate": 0.0002552521008403361, | |
| "loss": 2.0111, | |
| "step": 95000 | |
| }, | |
| { | |
| "epoch": 0.26, | |
| "learning_rate": 0.00025493697478991593, | |
| "loss": 2.0139, | |
| "step": 95500 | |
| }, | |
| { | |
| "epoch": 0.26, | |
| "learning_rate": 0.00025462184873949575, | |
| "loss": 2.0092, | |
| "step": 96000 | |
| }, | |
| { | |
| "epoch": 0.26, | |
| "learning_rate": 0.0002543067226890756, | |
| "loss": 2.0139, | |
| "step": 96500 | |
| }, | |
| { | |
| "epoch": 0.26, | |
| "learning_rate": 0.00025399159663865543, | |
| "loss": 2.0017, | |
| "step": 97000 | |
| }, | |
| { | |
| "epoch": 0.26, | |
| "learning_rate": 0.00025367647058823525, | |
| "loss": 2.0061, | |
| "step": 97500 | |
| }, | |
| { | |
| "epoch": 0.27, | |
| "learning_rate": 0.0002533613445378151, | |
| "loss": 2.0049, | |
| "step": 98000 | |
| }, | |
| { | |
| "epoch": 0.27, | |
| "learning_rate": 0.00025304621848739494, | |
| "loss": 2.0054, | |
| "step": 98500 | |
| }, | |
| { | |
| "epoch": 0.27, | |
| "learning_rate": 0.00025273109243697476, | |
| "loss": 2.0045, | |
| "step": 99000 | |
| }, | |
| { | |
| "epoch": 0.27, | |
| "learning_rate": 0.0002524159663865546, | |
| "loss": 2.0043, | |
| "step": 99500 | |
| }, | |
| { | |
| "epoch": 0.27, | |
| "learning_rate": 0.00025210084033613445, | |
| "loss": 2.0007, | |
| "step": 100000 | |
| }, | |
| { | |
| "epoch": 0.27, | |
| "learning_rate": 0.00025178571428571426, | |
| "loss": 2.0047, | |
| "step": 100500 | |
| }, | |
| { | |
| "epoch": 0.27, | |
| "learning_rate": 0.0002514705882352941, | |
| "loss": 2.0061, | |
| "step": 101000 | |
| }, | |
| { | |
| "epoch": 0.27, | |
| "learning_rate": 0.00025115546218487395, | |
| "loss": 2.0006, | |
| "step": 101500 | |
| }, | |
| { | |
| "epoch": 0.28, | |
| "learning_rate": 0.00025084033613445377, | |
| "loss": 1.9995, | |
| "step": 102000 | |
| }, | |
| { | |
| "epoch": 0.28, | |
| "learning_rate": 0.0002505252100840336, | |
| "loss": 1.9997, | |
| "step": 102500 | |
| }, | |
| { | |
| "epoch": 0.28, | |
| "learning_rate": 0.00025021008403361346, | |
| "loss": 1.9942, | |
| "step": 103000 | |
| }, | |
| { | |
| "epoch": 0.28, | |
| "learning_rate": 0.0002498949579831933, | |
| "loss": 1.9951, | |
| "step": 103500 | |
| }, | |
| { | |
| "epoch": 0.28, | |
| "learning_rate": 0.0002495798319327731, | |
| "loss": 1.9872, | |
| "step": 104000 | |
| }, | |
| { | |
| "epoch": 0.28, | |
| "learning_rate": 0.0002492647058823529, | |
| "loss": 1.9908, | |
| "step": 104500 | |
| }, | |
| { | |
| "epoch": 0.28, | |
| "learning_rate": 0.0002489495798319328, | |
| "loss": 1.9933, | |
| "step": 105000 | |
| }, | |
| { | |
| "epoch": 0.29, | |
| "learning_rate": 0.0002486344537815126, | |
| "loss": 1.9884, | |
| "step": 105500 | |
| }, | |
| { | |
| "epoch": 0.29, | |
| "learning_rate": 0.0002483193277310924, | |
| "loss": 1.9885, | |
| "step": 106000 | |
| }, | |
| { | |
| "epoch": 0.29, | |
| "learning_rate": 0.00024800420168067224, | |
| "loss": 1.9881, | |
| "step": 106500 | |
| }, | |
| { | |
| "epoch": 0.29, | |
| "learning_rate": 0.00024768907563025205, | |
| "loss": 1.9842, | |
| "step": 107000 | |
| }, | |
| { | |
| "epoch": 0.29, | |
| "learning_rate": 0.00024737394957983187, | |
| "loss": 1.9816, | |
| "step": 107500 | |
| }, | |
| { | |
| "epoch": 0.29, | |
| "learning_rate": 0.00024705882352941174, | |
| "loss": 1.9772, | |
| "step": 108000 | |
| }, | |
| { | |
| "epoch": 0.29, | |
| "learning_rate": 0.00024674369747899156, | |
| "loss": 1.9766, | |
| "step": 108500 | |
| }, | |
| { | |
| "epoch": 0.3, | |
| "learning_rate": 0.0002464285714285714, | |
| "loss": 1.9795, | |
| "step": 109000 | |
| }, | |
| { | |
| "epoch": 0.3, | |
| "learning_rate": 0.00024611344537815125, | |
| "loss": 1.9754, | |
| "step": 109500 | |
| }, | |
| { | |
| "epoch": 0.3, | |
| "learning_rate": 0.00024579831932773107, | |
| "loss": 1.9751, | |
| "step": 110000 | |
| }, | |
| { | |
| "epoch": 0.3, | |
| "learning_rate": 0.0002454831932773109, | |
| "loss": 1.9757, | |
| "step": 110500 | |
| }, | |
| { | |
| "epoch": 0.3, | |
| "learning_rate": 0.00024516806722689076, | |
| "loss": 1.9755, | |
| "step": 111000 | |
| }, | |
| { | |
| "epoch": 0.3, | |
| "learning_rate": 0.0002448529411764706, | |
| "loss": 1.9753, | |
| "step": 111500 | |
| }, | |
| { | |
| "epoch": 0.3, | |
| "learning_rate": 0.0002445378151260504, | |
| "loss": 1.9657, | |
| "step": 112000 | |
| }, | |
| { | |
| "epoch": 0.3, | |
| "learning_rate": 0.0002442226890756302, | |
| "loss": 1.9663, | |
| "step": 112500 | |
| }, | |
| { | |
| "epoch": 0.31, | |
| "learning_rate": 0.00024390756302521005, | |
| "loss": 1.9682, | |
| "step": 113000 | |
| }, | |
| { | |
| "epoch": 0.31, | |
| "learning_rate": 0.0002435924369747899, | |
| "loss": 1.971, | |
| "step": 113500 | |
| }, | |
| { | |
| "epoch": 0.31, | |
| "learning_rate": 0.00024327731092436971, | |
| "loss": 1.9687, | |
| "step": 114000 | |
| }, | |
| { | |
| "epoch": 0.31, | |
| "learning_rate": 0.00024296218487394956, | |
| "loss": 1.9826, | |
| "step": 114500 | |
| }, | |
| { | |
| "epoch": 0.31, | |
| "learning_rate": 0.0002426470588235294, | |
| "loss": 1.9643, | |
| "step": 115000 | |
| }, | |
| { | |
| "epoch": 0.31, | |
| "learning_rate": 0.00024233193277310922, | |
| "loss": 1.9708, | |
| "step": 115500 | |
| }, | |
| { | |
| "epoch": 0.31, | |
| "learning_rate": 0.00024201680672268907, | |
| "loss": 1.9628, | |
| "step": 116000 | |
| }, | |
| { | |
| "epoch": 0.32, | |
| "learning_rate": 0.00024170168067226888, | |
| "loss": 1.9633, | |
| "step": 116500 | |
| }, | |
| { | |
| "epoch": 0.32, | |
| "learning_rate": 0.00024138655462184873, | |
| "loss": 1.9607, | |
| "step": 117000 | |
| }, | |
| { | |
| "epoch": 0.32, | |
| "learning_rate": 0.00024107142857142857, | |
| "loss": 1.9574, | |
| "step": 117500 | |
| }, | |
| { | |
| "epoch": 0.32, | |
| "learning_rate": 0.0002407563025210084, | |
| "loss": 1.9611, | |
| "step": 118000 | |
| }, | |
| { | |
| "epoch": 0.32, | |
| "learning_rate": 0.00024044117647058823, | |
| "loss": 1.9575, | |
| "step": 118500 | |
| }, | |
| { | |
| "epoch": 0.32, | |
| "learning_rate": 0.00024012605042016805, | |
| "loss": 1.9538, | |
| "step": 119000 | |
| }, | |
| { | |
| "epoch": 0.32, | |
| "learning_rate": 0.0002398109243697479, | |
| "loss": 1.9573, | |
| "step": 119500 | |
| }, | |
| { | |
| "epoch": 0.32, | |
| "learning_rate": 0.00023949579831932771, | |
| "loss": 1.9601, | |
| "step": 120000 | |
| }, | |
| { | |
| "epoch": 0.33, | |
| "learning_rate": 0.00023918067226890756, | |
| "loss": 1.9539, | |
| "step": 120500 | |
| }, | |
| { | |
| "epoch": 0.33, | |
| "learning_rate": 0.00023886554621848735, | |
| "loss": 1.9532, | |
| "step": 121000 | |
| }, | |
| { | |
| "epoch": 0.33, | |
| "learning_rate": 0.0002385504201680672, | |
| "loss": 1.951, | |
| "step": 121500 | |
| }, | |
| { | |
| "epoch": 0.33, | |
| "learning_rate": 0.000238235294117647, | |
| "loss": 1.9554, | |
| "step": 122000 | |
| }, | |
| { | |
| "epoch": 0.33, | |
| "learning_rate": 0.00023792016806722686, | |
| "loss": 1.9472, | |
| "step": 122500 | |
| }, | |
| { | |
| "epoch": 0.33, | |
| "learning_rate": 0.0002376050420168067, | |
| "loss": 1.9462, | |
| "step": 123000 | |
| }, | |
| { | |
| "epoch": 0.33, | |
| "learning_rate": 0.00023728991596638652, | |
| "loss": 1.9524, | |
| "step": 123500 | |
| }, | |
| { | |
| "epoch": 0.34, | |
| "learning_rate": 0.00023697478991596636, | |
| "loss": 1.9525, | |
| "step": 124000 | |
| }, | |
| { | |
| "epoch": 0.34, | |
| "learning_rate": 0.00023665966386554618, | |
| "loss": 1.9444, | |
| "step": 124500 | |
| }, | |
| { | |
| "epoch": 0.34, | |
| "learning_rate": 0.00023634453781512602, | |
| "loss": 1.9404, | |
| "step": 125000 | |
| }, | |
| { | |
| "epoch": 0.34, | |
| "learning_rate": 0.00023602941176470587, | |
| "loss": 1.9431, | |
| "step": 125500 | |
| }, | |
| { | |
| "epoch": 0.34, | |
| "learning_rate": 0.00023571428571428569, | |
| "loss": 1.943, | |
| "step": 126000 | |
| }, | |
| { | |
| "epoch": 0.34, | |
| "learning_rate": 0.00023539915966386553, | |
| "loss": 1.9404, | |
| "step": 126500 | |
| }, | |
| { | |
| "epoch": 0.34, | |
| "learning_rate": 0.00023508403361344535, | |
| "loss": 1.9447, | |
| "step": 127000 | |
| }, | |
| { | |
| "epoch": 0.35, | |
| "learning_rate": 0.0002347689075630252, | |
| "loss": 1.9425, | |
| "step": 127500 | |
| }, | |
| { | |
| "epoch": 0.35, | |
| "learning_rate": 0.000234453781512605, | |
| "loss": 1.9376, | |
| "step": 128000 | |
| }, | |
| { | |
| "epoch": 0.35, | |
| "learning_rate": 0.00023413865546218485, | |
| "loss": 1.9374, | |
| "step": 128500 | |
| }, | |
| { | |
| "epoch": 0.35, | |
| "learning_rate": 0.0002338235294117647, | |
| "loss": 1.9373, | |
| "step": 129000 | |
| }, | |
| { | |
| "epoch": 0.35, | |
| "learning_rate": 0.00023350840336134452, | |
| "loss": 1.9364, | |
| "step": 129500 | |
| }, | |
| { | |
| "epoch": 0.35, | |
| "learning_rate": 0.00023319327731092436, | |
| "loss": 1.9344, | |
| "step": 130000 | |
| }, | |
| { | |
| "epoch": 0.35, | |
| "learning_rate": 0.00023287815126050418, | |
| "loss": 1.9299, | |
| "step": 130500 | |
| }, | |
| { | |
| "epoch": 0.35, | |
| "learning_rate": 0.00023256302521008402, | |
| "loss": 1.9243, | |
| "step": 131000 | |
| }, | |
| { | |
| "epoch": 0.36, | |
| "learning_rate": 0.00023224789915966384, | |
| "loss": 1.93, | |
| "step": 131500 | |
| }, | |
| { | |
| "epoch": 0.36, | |
| "learning_rate": 0.00023193277310924368, | |
| "loss": 1.925, | |
| "step": 132000 | |
| }, | |
| { | |
| "epoch": 0.36, | |
| "learning_rate": 0.00023161764705882353, | |
| "loss": 1.9256, | |
| "step": 132500 | |
| }, | |
| { | |
| "epoch": 0.36, | |
| "learning_rate": 0.00023130252100840335, | |
| "loss": 1.9334, | |
| "step": 133000 | |
| }, | |
| { | |
| "epoch": 0.36, | |
| "learning_rate": 0.0002309873949579832, | |
| "loss": 1.9266, | |
| "step": 133500 | |
| }, | |
| { | |
| "epoch": 0.36, | |
| "learning_rate": 0.000230672268907563, | |
| "loss": 1.9338, | |
| "step": 134000 | |
| }, | |
| { | |
| "epoch": 0.36, | |
| "learning_rate": 0.00023035714285714285, | |
| "loss": 1.9258, | |
| "step": 134500 | |
| }, | |
| { | |
| "epoch": 0.37, | |
| "learning_rate": 0.00023004201680672267, | |
| "loss": 1.9253, | |
| "step": 135000 | |
| }, | |
| { | |
| "epoch": 0.37, | |
| "learning_rate": 0.00022972689075630252, | |
| "loss": 1.9259, | |
| "step": 135500 | |
| }, | |
| { | |
| "epoch": 0.37, | |
| "learning_rate": 0.0002294117647058823, | |
| "loss": 1.9509, | |
| "step": 136000 | |
| }, | |
| { | |
| "epoch": 0.37, | |
| "learning_rate": 0.00022909663865546215, | |
| "loss": 1.9366, | |
| "step": 136500 | |
| }, | |
| { | |
| "epoch": 0.37, | |
| "learning_rate": 0.000228781512605042, | |
| "loss": 1.9323, | |
| "step": 137000 | |
| }, | |
| { | |
| "epoch": 0.37, | |
| "learning_rate": 0.0002284663865546218, | |
| "loss": 1.9298, | |
| "step": 137500 | |
| }, | |
| { | |
| "epoch": 0.37, | |
| "learning_rate": 0.00022815126050420166, | |
| "loss": 1.9611, | |
| "step": 138000 | |
| }, | |
| { | |
| "epoch": 0.37, | |
| "learning_rate": 0.00022783613445378147, | |
| "loss": 1.9599, | |
| "step": 138500 | |
| }, | |
| { | |
| "epoch": 0.38, | |
| "learning_rate": 0.00022752100840336132, | |
| "loss": 1.9537, | |
| "step": 139000 | |
| }, | |
| { | |
| "epoch": 0.38, | |
| "learning_rate": 0.00022720588235294114, | |
| "loss": 1.9308, | |
| "step": 139500 | |
| }, | |
| { | |
| "epoch": 0.38, | |
| "learning_rate": 0.00022689075630252098, | |
| "loss": 1.9208, | |
| "step": 140000 | |
| }, | |
| { | |
| "epoch": 0.38, | |
| "learning_rate": 0.00022657563025210083, | |
| "loss": 1.9233, | |
| "step": 140500 | |
| }, | |
| { | |
| "epoch": 0.38, | |
| "learning_rate": 0.00022626050420168064, | |
| "loss": 1.9233, | |
| "step": 141000 | |
| }, | |
| { | |
| "epoch": 0.38, | |
| "learning_rate": 0.0002259453781512605, | |
| "loss": 1.927, | |
| "step": 141500 | |
| }, | |
| { | |
| "epoch": 0.38, | |
| "learning_rate": 0.0002256302521008403, | |
| "loss": 1.9331, | |
| "step": 142000 | |
| }, | |
| { | |
| "epoch": 0.39, | |
| "learning_rate": 0.00022531512605042015, | |
| "loss": 1.9358, | |
| "step": 142500 | |
| }, | |
| { | |
| "epoch": 0.39, | |
| "learning_rate": 0.000225, | |
| "loss": 1.9415, | |
| "step": 143000 | |
| }, | |
| { | |
| "epoch": 0.39, | |
| "learning_rate": 0.0002246848739495798, | |
| "loss": 1.9427, | |
| "step": 143500 | |
| }, | |
| { | |
| "epoch": 0.39, | |
| "learning_rate": 0.00022436974789915966, | |
| "loss": 1.9216, | |
| "step": 144000 | |
| }, | |
| { | |
| "epoch": 0.39, | |
| "learning_rate": 0.00022405462184873947, | |
| "loss": 1.9196, | |
| "step": 144500 | |
| }, | |
| { | |
| "epoch": 0.39, | |
| "learning_rate": 0.00022373949579831932, | |
| "loss": 1.9109, | |
| "step": 145000 | |
| }, | |
| { | |
| "epoch": 0.39, | |
| "learning_rate": 0.00022342436974789914, | |
| "loss": 1.9166, | |
| "step": 145500 | |
| }, | |
| { | |
| "epoch": 0.4, | |
| "learning_rate": 0.00022310924369747898, | |
| "loss": 1.9231, | |
| "step": 146000 | |
| }, | |
| { | |
| "epoch": 0.4, | |
| "learning_rate": 0.00022279411764705882, | |
| "loss": 1.9091, | |
| "step": 146500 | |
| }, | |
| { | |
| "epoch": 0.4, | |
| "learning_rate": 0.00022247899159663864, | |
| "loss": 1.9096, | |
| "step": 147000 | |
| }, | |
| { | |
| "epoch": 0.4, | |
| "learning_rate": 0.00022216386554621849, | |
| "loss": 1.9107, | |
| "step": 147500 | |
| }, | |
| { | |
| "epoch": 0.4, | |
| "learning_rate": 0.0002218487394957983, | |
| "loss": 1.908, | |
| "step": 148000 | |
| }, | |
| { | |
| "epoch": 0.4, | |
| "learning_rate": 0.00022153361344537815, | |
| "loss": 1.9052, | |
| "step": 148500 | |
| }, | |
| { | |
| "epoch": 0.4, | |
| "learning_rate": 0.00022121848739495797, | |
| "loss": 1.9058, | |
| "step": 149000 | |
| }, | |
| { | |
| "epoch": 0.4, | |
| "learning_rate": 0.0002209033613445378, | |
| "loss": 1.9043, | |
| "step": 149500 | |
| }, | |
| { | |
| "epoch": 0.41, | |
| "learning_rate": 0.00022058823529411765, | |
| "loss": 1.9049, | |
| "step": 150000 | |
| }, | |
| { | |
| "epoch": 0.41, | |
| "learning_rate": 0.00022027310924369745, | |
| "loss": 1.9028, | |
| "step": 150500 | |
| }, | |
| { | |
| "epoch": 0.41, | |
| "learning_rate": 0.0002199579831932773, | |
| "loss": 1.9034, | |
| "step": 151000 | |
| }, | |
| { | |
| "epoch": 0.41, | |
| "learning_rate": 0.0002196428571428571, | |
| "loss": 1.8976, | |
| "step": 151500 | |
| }, | |
| { | |
| "epoch": 0.41, | |
| "learning_rate": 0.00021932773109243695, | |
| "loss": 1.8957, | |
| "step": 152000 | |
| }, | |
| { | |
| "epoch": 0.41, | |
| "learning_rate": 0.00021901260504201677, | |
| "loss": 1.895, | |
| "step": 152500 | |
| }, | |
| { | |
| "epoch": 0.41, | |
| "learning_rate": 0.00021869747899159661, | |
| "loss": 1.8944, | |
| "step": 153000 | |
| }, | |
| { | |
| "epoch": 0.42, | |
| "learning_rate": 0.00021838235294117643, | |
| "loss": 1.8904, | |
| "step": 153500 | |
| }, | |
| { | |
| "epoch": 0.42, | |
| "learning_rate": 0.00021806722689075628, | |
| "loss": 1.8939, | |
| "step": 154000 | |
| }, | |
| { | |
| "epoch": 0.42, | |
| "learning_rate": 0.00021775210084033612, | |
| "loss": 1.8921, | |
| "step": 154500 | |
| }, | |
| { | |
| "epoch": 0.42, | |
| "learning_rate": 0.00021743697478991594, | |
| "loss": 1.8873, | |
| "step": 155000 | |
| }, | |
| { | |
| "epoch": 0.42, | |
| "learning_rate": 0.00021712184873949578, | |
| "loss": 1.8907, | |
| "step": 155500 | |
| }, | |
| { | |
| "epoch": 0.42, | |
| "learning_rate": 0.0002168067226890756, | |
| "loss": 1.8898, | |
| "step": 156000 | |
| }, | |
| { | |
| "epoch": 0.42, | |
| "learning_rate": 0.00021649159663865544, | |
| "loss": 1.8826, | |
| "step": 156500 | |
| }, | |
| { | |
| "epoch": 0.43, | |
| "learning_rate": 0.00021617647058823526, | |
| "loss": 1.8896, | |
| "step": 157000 | |
| }, | |
| { | |
| "epoch": 0.43, | |
| "learning_rate": 0.0002158613445378151, | |
| "loss": 1.8836, | |
| "step": 157500 | |
| }, | |
| { | |
| "epoch": 0.43, | |
| "learning_rate": 0.00021554621848739495, | |
| "loss": 1.9006, | |
| "step": 158000 | |
| }, | |
| { | |
| "epoch": 0.43, | |
| "learning_rate": 0.00021523109243697477, | |
| "loss": 1.8949, | |
| "step": 158500 | |
| }, | |
| { | |
| "epoch": 0.43, | |
| "learning_rate": 0.0002149159663865546, | |
| "loss": 1.8931, | |
| "step": 159000 | |
| }, | |
| { | |
| "epoch": 0.43, | |
| "learning_rate": 0.00021460084033613443, | |
| "loss": 1.8886, | |
| "step": 159500 | |
| }, | |
| { | |
| "epoch": 0.43, | |
| "learning_rate": 0.00021428571428571427, | |
| "loss": 1.892, | |
| "step": 160000 | |
| }, | |
| { | |
| "epoch": 0.43, | |
| "learning_rate": 0.0002139705882352941, | |
| "loss": 1.8935, | |
| "step": 160500 | |
| }, | |
| { | |
| "epoch": 0.44, | |
| "learning_rate": 0.00021365546218487394, | |
| "loss": 1.8977, | |
| "step": 161000 | |
| }, | |
| { | |
| "epoch": 0.44, | |
| "learning_rate": 0.00021334033613445378, | |
| "loss": 1.8891, | |
| "step": 161500 | |
| }, | |
| { | |
| "epoch": 0.44, | |
| "learning_rate": 0.0002130252100840336, | |
| "loss": 1.8846, | |
| "step": 162000 | |
| }, | |
| { | |
| "epoch": 0.44, | |
| "learning_rate": 0.00021271008403361344, | |
| "loss": 1.8816, | |
| "step": 162500 | |
| }, | |
| { | |
| "epoch": 0.44, | |
| "learning_rate": 0.00021239495798319326, | |
| "loss": 1.8791, | |
| "step": 163000 | |
| }, | |
| { | |
| "epoch": 0.44, | |
| "learning_rate": 0.0002120798319327731, | |
| "loss": 1.9027, | |
| "step": 163500 | |
| }, | |
| { | |
| "epoch": 0.44, | |
| "learning_rate": 0.00021176470588235295, | |
| "loss": 1.8873, | |
| "step": 164000 | |
| }, | |
| { | |
| "epoch": 0.45, | |
| "learning_rate": 0.00021144957983193277, | |
| "loss": 1.8795, | |
| "step": 164500 | |
| }, | |
| { | |
| "epoch": 0.45, | |
| "learning_rate": 0.0002111344537815126, | |
| "loss": 1.8775, | |
| "step": 165000 | |
| }, | |
| { | |
| "epoch": 0.45, | |
| "learning_rate": 0.0002108193277310924, | |
| "loss": 1.8799, | |
| "step": 165500 | |
| }, | |
| { | |
| "epoch": 0.45, | |
| "learning_rate": 0.00021050420168067225, | |
| "loss": 1.8771, | |
| "step": 166000 | |
| }, | |
| { | |
| "epoch": 0.45, | |
| "learning_rate": 0.00021018907563025206, | |
| "loss": 1.8797, | |
| "step": 166500 | |
| }, | |
| { | |
| "epoch": 0.45, | |
| "learning_rate": 0.0002098739495798319, | |
| "loss": 1.8759, | |
| "step": 167000 | |
| }, | |
| { | |
| "epoch": 0.45, | |
| "learning_rate": 0.00020955882352941173, | |
| "loss": 1.8742, | |
| "step": 167500 | |
| }, | |
| { | |
| "epoch": 0.45, | |
| "learning_rate": 0.00020924369747899157, | |
| "loss": 1.8691, | |
| "step": 168000 | |
| }, | |
| { | |
| "epoch": 0.46, | |
| "learning_rate": 0.0002089285714285714, | |
| "loss": 1.8702, | |
| "step": 168500 | |
| }, | |
| { | |
| "epoch": 0.46, | |
| "learning_rate": 0.00020861344537815123, | |
| "loss": 1.8689, | |
| "step": 169000 | |
| }, | |
| { | |
| "epoch": 0.46, | |
| "learning_rate": 0.00020829831932773108, | |
| "loss": 1.8701, | |
| "step": 169500 | |
| }, | |
| { | |
| "epoch": 0.46, | |
| "learning_rate": 0.0002079831932773109, | |
| "loss": 1.8695, | |
| "step": 170000 | |
| }, | |
| { | |
| "epoch": 0.46, | |
| "learning_rate": 0.00020766806722689074, | |
| "loss": 1.8706, | |
| "step": 170500 | |
| }, | |
| { | |
| "epoch": 0.46, | |
| "learning_rate": 0.00020735294117647056, | |
| "loss": 1.8935, | |
| "step": 171000 | |
| }, | |
| { | |
| "epoch": 0.46, | |
| "learning_rate": 0.0002070378151260504, | |
| "loss": 1.9075, | |
| "step": 171500 | |
| }, | |
| { | |
| "epoch": 0.47, | |
| "learning_rate": 0.00020672268907563025, | |
| "loss": 1.8929, | |
| "step": 172000 | |
| }, | |
| { | |
| "epoch": 0.47, | |
| "learning_rate": 0.00020640756302521006, | |
| "loss": 1.8795, | |
| "step": 172500 | |
| }, | |
| { | |
| "epoch": 0.47, | |
| "learning_rate": 0.0002060924369747899, | |
| "loss": 1.8845, | |
| "step": 173000 | |
| }, | |
| { | |
| "epoch": 0.47, | |
| "learning_rate": 0.00020577731092436973, | |
| "loss": 1.8762, | |
| "step": 173500 | |
| }, | |
| { | |
| "epoch": 0.47, | |
| "learning_rate": 0.00020546218487394957, | |
| "loss": 1.8745, | |
| "step": 174000 | |
| }, | |
| { | |
| "epoch": 0.47, | |
| "learning_rate": 0.0002051470588235294, | |
| "loss": 1.8751, | |
| "step": 174500 | |
| }, | |
| { | |
| "epoch": 0.47, | |
| "learning_rate": 0.00020483193277310923, | |
| "loss": 1.8746, | |
| "step": 175000 | |
| }, | |
| { | |
| "epoch": 0.48, | |
| "learning_rate": 0.00020451680672268908, | |
| "loss": 1.8614, | |
| "step": 175500 | |
| }, | |
| { | |
| "epoch": 0.48, | |
| "learning_rate": 0.0002042016806722689, | |
| "loss": 1.8634, | |
| "step": 176000 | |
| }, | |
| { | |
| "epoch": 0.48, | |
| "learning_rate": 0.00020388655462184874, | |
| "loss": 1.8711, | |
| "step": 176500 | |
| }, | |
| { | |
| "epoch": 0.48, | |
| "learning_rate": 0.00020357142857142856, | |
| "loss": 1.8648, | |
| "step": 177000 | |
| }, | |
| { | |
| "epoch": 0.48, | |
| "learning_rate": 0.0002032563025210084, | |
| "loss": 1.8615, | |
| "step": 177500 | |
| }, | |
| { | |
| "epoch": 0.48, | |
| "learning_rate": 0.00020294117647058822, | |
| "loss": 1.8661, | |
| "step": 178000 | |
| }, | |
| { | |
| "epoch": 0.48, | |
| "learning_rate": 0.00020262605042016806, | |
| "loss": 1.8616, | |
| "step": 178500 | |
| }, | |
| { | |
| "epoch": 0.48, | |
| "learning_rate": 0.0002023109243697479, | |
| "loss": 1.8601, | |
| "step": 179000 | |
| }, | |
| { | |
| "epoch": 0.49, | |
| "learning_rate": 0.00020199579831932772, | |
| "loss": 1.865, | |
| "step": 179500 | |
| }, | |
| { | |
| "epoch": 0.49, | |
| "learning_rate": 0.00020168067226890757, | |
| "loss": 1.8664, | |
| "step": 180000 | |
| }, | |
| { | |
| "epoch": 0.49, | |
| "learning_rate": 0.00020136554621848736, | |
| "loss": 1.8562, | |
| "step": 180500 | |
| }, | |
| { | |
| "epoch": 0.49, | |
| "learning_rate": 0.0002010504201680672, | |
| "loss": 1.8589, | |
| "step": 181000 | |
| }, | |
| { | |
| "epoch": 0.49, | |
| "learning_rate": 0.00020073529411764702, | |
| "loss": 1.8583, | |
| "step": 181500 | |
| }, | |
| { | |
| "epoch": 0.49, | |
| "learning_rate": 0.00020042016806722687, | |
| "loss": 1.8623, | |
| "step": 182000 | |
| }, | |
| { | |
| "epoch": 0.49, | |
| "learning_rate": 0.00020010504201680668, | |
| "loss": 1.8561, | |
| "step": 182500 | |
| }, | |
| { | |
| "epoch": 0.5, | |
| "learning_rate": 0.00019978991596638653, | |
| "loss": 1.8537, | |
| "step": 183000 | |
| }, | |
| { | |
| "epoch": 0.5, | |
| "learning_rate": 0.00019947478991596637, | |
| "loss": 1.8622, | |
| "step": 183500 | |
| }, | |
| { | |
| "epoch": 0.5, | |
| "learning_rate": 0.0001991596638655462, | |
| "loss": 1.8568, | |
| "step": 184000 | |
| }, | |
| { | |
| "epoch": 0.5, | |
| "learning_rate": 0.00019884453781512603, | |
| "loss": 1.8543, | |
| "step": 184500 | |
| }, | |
| { | |
| "epoch": 0.5, | |
| "learning_rate": 0.00019852941176470585, | |
| "loss": 1.8528, | |
| "step": 185000 | |
| }, | |
| { | |
| "epoch": 0.5, | |
| "learning_rate": 0.0001982142857142857, | |
| "loss": 1.8509, | |
| "step": 185500 | |
| }, | |
| { | |
| "epoch": 0.5, | |
| "learning_rate": 0.0001978991596638655, | |
| "loss": 1.8489, | |
| "step": 186000 | |
| }, | |
| { | |
| "epoch": 0.5, | |
| "learning_rate": 0.00019758403361344536, | |
| "loss": 1.8483, | |
| "step": 186500 | |
| }, | |
| { | |
| "epoch": 0.51, | |
| "learning_rate": 0.0001972689075630252, | |
| "loss": 1.8487, | |
| "step": 187000 | |
| }, | |
| { | |
| "epoch": 0.51, | |
| "learning_rate": 0.00019695378151260502, | |
| "loss": 1.8416, | |
| "step": 187500 | |
| }, | |
| { | |
| "epoch": 0.51, | |
| "learning_rate": 0.00019663865546218486, | |
| "loss": 1.8497, | |
| "step": 188000 | |
| }, | |
| { | |
| "epoch": 0.51, | |
| "learning_rate": 0.00019632352941176468, | |
| "loss": 1.849, | |
| "step": 188500 | |
| }, | |
| { | |
| "epoch": 0.51, | |
| "learning_rate": 0.00019600840336134453, | |
| "loss": 1.8466, | |
| "step": 189000 | |
| }, | |
| { | |
| "epoch": 0.51, | |
| "learning_rate": 0.00019569327731092434, | |
| "loss": 1.8456, | |
| "step": 189500 | |
| }, | |
| { | |
| "epoch": 0.51, | |
| "learning_rate": 0.0001953781512605042, | |
| "loss": 1.8453, | |
| "step": 190000 | |
| }, | |
| { | |
| "epoch": 0.52, | |
| "learning_rate": 0.00019506302521008403, | |
| "loss": 1.8418, | |
| "step": 190500 | |
| }, | |
| { | |
| "epoch": 0.52, | |
| "learning_rate": 0.00019474789915966385, | |
| "loss": 1.8429, | |
| "step": 191000 | |
| }, | |
| { | |
| "epoch": 0.52, | |
| "learning_rate": 0.0001944327731092437, | |
| "loss": 1.8453, | |
| "step": 191500 | |
| }, | |
| { | |
| "epoch": 0.52, | |
| "learning_rate": 0.0001941176470588235, | |
| "loss": 1.8458, | |
| "step": 192000 | |
| }, | |
| { | |
| "epoch": 0.52, | |
| "learning_rate": 0.00019380252100840336, | |
| "loss": 1.842, | |
| "step": 192500 | |
| }, | |
| { | |
| "epoch": 0.52, | |
| "learning_rate": 0.0001934873949579832, | |
| "loss": 1.8381, | |
| "step": 193000 | |
| }, | |
| { | |
| "epoch": 0.52, | |
| "learning_rate": 0.00019317226890756302, | |
| "loss": 1.8374, | |
| "step": 193500 | |
| }, | |
| { | |
| "epoch": 0.53, | |
| "learning_rate": 0.00019285714285714286, | |
| "loss": 1.8359, | |
| "step": 194000 | |
| }, | |
| { | |
| "epoch": 0.53, | |
| "learning_rate": 0.00019254201680672268, | |
| "loss": 1.8336, | |
| "step": 194500 | |
| }, | |
| { | |
| "epoch": 0.53, | |
| "learning_rate": 0.00019222689075630253, | |
| "loss": 1.8365, | |
| "step": 195000 | |
| }, | |
| { | |
| "epoch": 0.53, | |
| "learning_rate": 0.00019191176470588232, | |
| "loss": 1.8391, | |
| "step": 195500 | |
| }, | |
| { | |
| "epoch": 0.53, | |
| "learning_rate": 0.00019159663865546216, | |
| "loss": 1.8431, | |
| "step": 196000 | |
| }, | |
| { | |
| "epoch": 0.53, | |
| "learning_rate": 0.00019128151260504198, | |
| "loss": 1.8361, | |
| "step": 196500 | |
| }, | |
| { | |
| "epoch": 0.53, | |
| "learning_rate": 0.00019096638655462182, | |
| "loss": 1.8583, | |
| "step": 197000 | |
| }, | |
| { | |
| "epoch": 0.53, | |
| "learning_rate": 0.00019065126050420164, | |
| "loss": 1.8656, | |
| "step": 197500 | |
| }, | |
| { | |
| "epoch": 0.54, | |
| "learning_rate": 0.00019033613445378148, | |
| "loss": 1.8508, | |
| "step": 198000 | |
| }, | |
| { | |
| "epoch": 0.54, | |
| "learning_rate": 0.00019002100840336133, | |
| "loss": 1.8533, | |
| "step": 198500 | |
| }, | |
| { | |
| "epoch": 0.54, | |
| "learning_rate": 0.00018970588235294115, | |
| "loss": 1.8416, | |
| "step": 199000 | |
| }, | |
| { | |
| "epoch": 0.54, | |
| "learning_rate": 0.000189390756302521, | |
| "loss": 1.8503, | |
| "step": 199500 | |
| }, | |
| { | |
| "epoch": 0.54, | |
| "learning_rate": 0.0001890756302521008, | |
| "loss": 1.844, | |
| "step": 200000 | |
| }, | |
| { | |
| "epoch": 0.54, | |
| "learning_rate": 0.00018876050420168065, | |
| "loss": 1.8389, | |
| "step": 200500 | |
| }, | |
| { | |
| "epoch": 0.54, | |
| "learning_rate": 0.0001884453781512605, | |
| "loss": 1.836, | |
| "step": 201000 | |
| }, | |
| { | |
| "epoch": 0.55, | |
| "learning_rate": 0.00018813025210084031, | |
| "loss": 1.8354, | |
| "step": 201500 | |
| }, | |
| { | |
| "epoch": 0.55, | |
| "learning_rate": 0.00018781512605042016, | |
| "loss": 1.833, | |
| "step": 202000 | |
| }, | |
| { | |
| "epoch": 0.55, | |
| "learning_rate": 0.00018749999999999998, | |
| "loss": 1.8308, | |
| "step": 202500 | |
| }, | |
| { | |
| "epoch": 0.55, | |
| "learning_rate": 0.00018718487394957982, | |
| "loss": 1.8314, | |
| "step": 203000 | |
| }, | |
| { | |
| "epoch": 0.55, | |
| "learning_rate": 0.00018686974789915964, | |
| "loss": 1.83, | |
| "step": 203500 | |
| }, | |
| { | |
| "epoch": 0.55, | |
| "learning_rate": 0.00018655462184873948, | |
| "loss": 1.8318, | |
| "step": 204000 | |
| }, | |
| { | |
| "epoch": 0.55, | |
| "learning_rate": 0.00018623949579831933, | |
| "loss": 1.8313, | |
| "step": 204500 | |
| }, | |
| { | |
| "epoch": 0.55, | |
| "learning_rate": 0.00018592436974789915, | |
| "loss": 1.8234, | |
| "step": 205000 | |
| }, | |
| { | |
| "epoch": 0.56, | |
| "learning_rate": 0.000185609243697479, | |
| "loss": 1.8286, | |
| "step": 205500 | |
| }, | |
| { | |
| "epoch": 0.56, | |
| "learning_rate": 0.0001852941176470588, | |
| "loss": 1.8246, | |
| "step": 206000 | |
| }, | |
| { | |
| "epoch": 0.56, | |
| "learning_rate": 0.00018497899159663865, | |
| "loss": 1.8213, | |
| "step": 206500 | |
| }, | |
| { | |
| "epoch": 0.56, | |
| "learning_rate": 0.00018466386554621847, | |
| "loss": 1.8237, | |
| "step": 207000 | |
| }, | |
| { | |
| "epoch": 0.56, | |
| "learning_rate": 0.00018434873949579831, | |
| "loss": 1.8247, | |
| "step": 207500 | |
| }, | |
| { | |
| "epoch": 0.56, | |
| "learning_rate": 0.00018403361344537816, | |
| "loss": 1.8215, | |
| "step": 208000 | |
| }, | |
| { | |
| "epoch": 0.56, | |
| "learning_rate": 0.00018371848739495798, | |
| "loss": 1.8174, | |
| "step": 208500 | |
| }, | |
| { | |
| "epoch": 0.57, | |
| "learning_rate": 0.00018340336134453782, | |
| "loss": 1.8215, | |
| "step": 209000 | |
| }, | |
| { | |
| "epoch": 0.57, | |
| "learning_rate": 0.00018308823529411764, | |
| "loss": 1.8168, | |
| "step": 209500 | |
| }, | |
| { | |
| "epoch": 0.57, | |
| "learning_rate": 0.00018277310924369746, | |
| "loss": 1.8186, | |
| "step": 210000 | |
| }, | |
| { | |
| "epoch": 0.57, | |
| "learning_rate": 0.00018245798319327727, | |
| "loss": 1.8229, | |
| "step": 210500 | |
| }, | |
| { | |
| "epoch": 0.57, | |
| "learning_rate": 0.00018214285714285712, | |
| "loss": 1.8165, | |
| "step": 211000 | |
| }, | |
| { | |
| "epoch": 0.57, | |
| "learning_rate": 0.00018182773109243693, | |
| "loss": 1.8173, | |
| "step": 211500 | |
| }, | |
| { | |
| "epoch": 0.57, | |
| "learning_rate": 0.00018151260504201678, | |
| "loss": 1.8115, | |
| "step": 212000 | |
| }, | |
| { | |
| "epoch": 0.58, | |
| "learning_rate": 0.00018119747899159662, | |
| "loss": 1.8157, | |
| "step": 212500 | |
| }, | |
| { | |
| "epoch": 0.58, | |
| "learning_rate": 0.00018088235294117644, | |
| "loss": 1.813, | |
| "step": 213000 | |
| }, | |
| { | |
| "epoch": 0.58, | |
| "learning_rate": 0.00018056722689075629, | |
| "loss": 1.8126, | |
| "step": 213500 | |
| }, | |
| { | |
| "epoch": 0.58, | |
| "learning_rate": 0.0001802521008403361, | |
| "loss": 1.808, | |
| "step": 214000 | |
| }, | |
| { | |
| "epoch": 0.58, | |
| "learning_rate": 0.00017993697478991595, | |
| "loss": 1.8116, | |
| "step": 214500 | |
| }, | |
| { | |
| "epoch": 0.58, | |
| "learning_rate": 0.00017962184873949577, | |
| "loss": 1.8165, | |
| "step": 215000 | |
| }, | |
| { | |
| "epoch": 0.58, | |
| "learning_rate": 0.0001793067226890756, | |
| "loss": 1.807, | |
| "step": 215500 | |
| }, | |
| { | |
| "epoch": 0.58, | |
| "learning_rate": 0.00017899159663865545, | |
| "loss": 1.8051, | |
| "step": 216000 | |
| }, | |
| { | |
| "epoch": 0.59, | |
| "learning_rate": 0.00017867647058823527, | |
| "loss": 1.8081, | |
| "step": 216500 | |
| }, | |
| { | |
| "epoch": 0.59, | |
| "learning_rate": 0.00017836134453781512, | |
| "loss": 1.8084, | |
| "step": 217000 | |
| }, | |
| { | |
| "epoch": 0.59, | |
| "learning_rate": 0.00017804621848739493, | |
| "loss": 1.811, | |
| "step": 217500 | |
| }, | |
| { | |
| "epoch": 0.59, | |
| "learning_rate": 0.00017773109243697478, | |
| "loss": 1.8068, | |
| "step": 218000 | |
| }, | |
| { | |
| "epoch": 0.59, | |
| "learning_rate": 0.0001774159663865546, | |
| "loss": 1.805, | |
| "step": 218500 | |
| }, | |
| { | |
| "epoch": 0.59, | |
| "learning_rate": 0.00017710084033613444, | |
| "loss": 1.8042, | |
| "step": 219000 | |
| }, | |
| { | |
| "epoch": 0.59, | |
| "learning_rate": 0.00017678571428571428, | |
| "loss": 1.8004, | |
| "step": 219500 | |
| }, | |
| { | |
| "epoch": 0.6, | |
| "learning_rate": 0.0001764705882352941, | |
| "loss": 1.8028, | |
| "step": 220000 | |
| }, | |
| { | |
| "epoch": 0.6, | |
| "learning_rate": 0.00017615546218487395, | |
| "loss": 1.8111, | |
| "step": 220500 | |
| }, | |
| { | |
| "epoch": 0.6, | |
| "learning_rate": 0.00017584033613445376, | |
| "loss": 1.8076, | |
| "step": 221000 | |
| }, | |
| { | |
| "epoch": 0.6, | |
| "learning_rate": 0.0001755252100840336, | |
| "loss": 1.8017, | |
| "step": 221500 | |
| }, | |
| { | |
| "epoch": 0.6, | |
| "learning_rate": 0.00017521008403361345, | |
| "loss": 1.802, | |
| "step": 222000 | |
| }, | |
| { | |
| "epoch": 0.6, | |
| "learning_rate": 0.00017489495798319327, | |
| "loss": 1.803, | |
| "step": 222500 | |
| }, | |
| { | |
| "epoch": 0.6, | |
| "learning_rate": 0.00017457983193277312, | |
| "loss": 1.8023, | |
| "step": 223000 | |
| }, | |
| { | |
| "epoch": 0.61, | |
| "learning_rate": 0.00017426470588235293, | |
| "loss": 1.7978, | |
| "step": 223500 | |
| }, | |
| { | |
| "epoch": 0.61, | |
| "learning_rate": 0.00017394957983193278, | |
| "loss": 1.7959, | |
| "step": 224000 | |
| }, | |
| { | |
| "epoch": 0.61, | |
| "learning_rate": 0.0001736344537815126, | |
| "loss": 1.7971, | |
| "step": 224500 | |
| }, | |
| { | |
| "epoch": 0.61, | |
| "learning_rate": 0.0001733193277310924, | |
| "loss": 1.801, | |
| "step": 225000 | |
| }, | |
| { | |
| "epoch": 0.61, | |
| "learning_rate": 0.00017300420168067223, | |
| "loss": 1.7987, | |
| "step": 225500 | |
| }, | |
| { | |
| "epoch": 0.61, | |
| "learning_rate": 0.00017268907563025207, | |
| "loss": 1.7975, | |
| "step": 226000 | |
| }, | |
| { | |
| "epoch": 0.61, | |
| "learning_rate": 0.0001723739495798319, | |
| "loss": 1.7997, | |
| "step": 226500 | |
| }, | |
| { | |
| "epoch": 0.61, | |
| "learning_rate": 0.00017205882352941174, | |
| "loss": 1.7993, | |
| "step": 227000 | |
| }, | |
| { | |
| "epoch": 0.62, | |
| "learning_rate": 0.00017174369747899158, | |
| "loss": 1.8021, | |
| "step": 227500 | |
| }, | |
| { | |
| "epoch": 0.62, | |
| "learning_rate": 0.0001714285714285714, | |
| "loss": 1.795, | |
| "step": 228000 | |
| }, | |
| { | |
| "epoch": 0.62, | |
| "learning_rate": 0.00017111344537815124, | |
| "loss": 1.7957, | |
| "step": 228500 | |
| }, | |
| { | |
| "epoch": 0.62, | |
| "learning_rate": 0.00017079831932773106, | |
| "loss": 1.7967, | |
| "step": 229000 | |
| }, | |
| { | |
| "epoch": 0.62, | |
| "learning_rate": 0.0001704831932773109, | |
| "loss": 1.7951, | |
| "step": 229500 | |
| }, | |
| { | |
| "epoch": 0.62, | |
| "learning_rate": 0.00017016806722689075, | |
| "loss": 1.7953, | |
| "step": 230000 | |
| }, | |
| { | |
| "epoch": 0.62, | |
| "learning_rate": 0.00016985294117647057, | |
| "loss": 1.7938, | |
| "step": 230500 | |
| }, | |
| { | |
| "epoch": 0.63, | |
| "learning_rate": 0.0001695378151260504, | |
| "loss": 1.7925, | |
| "step": 231000 | |
| }, | |
| { | |
| "epoch": 0.63, | |
| "learning_rate": 0.00016922268907563023, | |
| "loss": 1.7944, | |
| "step": 231500 | |
| }, | |
| { | |
| "epoch": 0.63, | |
| "learning_rate": 0.00016890756302521007, | |
| "loss": 1.7897, | |
| "step": 232000 | |
| }, | |
| { | |
| "epoch": 0.63, | |
| "learning_rate": 0.0001685924369747899, | |
| "loss": 1.7929, | |
| "step": 232500 | |
| }, | |
| { | |
| "epoch": 0.63, | |
| "learning_rate": 0.00016827731092436974, | |
| "loss": 1.7916, | |
| "step": 233000 | |
| }, | |
| { | |
| "epoch": 0.63, | |
| "learning_rate": 0.00016796218487394958, | |
| "loss": 1.7885, | |
| "step": 233500 | |
| }, | |
| { | |
| "epoch": 0.63, | |
| "learning_rate": 0.0001676470588235294, | |
| "loss": 1.7971, | |
| "step": 234000 | |
| }, | |
| { | |
| "epoch": 0.63, | |
| "learning_rate": 0.00016733193277310924, | |
| "loss": 1.7871, | |
| "step": 234500 | |
| }, | |
| { | |
| "epoch": 0.64, | |
| "learning_rate": 0.00016701680672268906, | |
| "loss": 1.7862, | |
| "step": 235000 | |
| }, | |
| { | |
| "epoch": 0.64, | |
| "learning_rate": 0.0001667016806722689, | |
| "loss": 1.7858, | |
| "step": 235500 | |
| }, | |
| { | |
| "epoch": 0.64, | |
| "learning_rate": 0.00016638655462184872, | |
| "loss": 1.7859, | |
| "step": 236000 | |
| }, | |
| { | |
| "epoch": 0.64, | |
| "learning_rate": 0.00016607142857142857, | |
| "loss": 1.7864, | |
| "step": 236500 | |
| }, | |
| { | |
| "epoch": 0.64, | |
| "learning_rate": 0.0001657563025210084, | |
| "loss": 1.7869, | |
| "step": 237000 | |
| }, | |
| { | |
| "epoch": 0.64, | |
| "learning_rate": 0.00016544117647058823, | |
| "loss": 1.7816, | |
| "step": 237500 | |
| }, | |
| { | |
| "epoch": 0.64, | |
| "learning_rate": 0.00016512605042016807, | |
| "loss": 1.7865, | |
| "step": 238000 | |
| }, | |
| { | |
| "epoch": 0.65, | |
| "learning_rate": 0.0001648109243697479, | |
| "loss": 1.7867, | |
| "step": 238500 | |
| }, | |
| { | |
| "epoch": 0.65, | |
| "learning_rate": 0.00016449579831932773, | |
| "loss": 1.7868, | |
| "step": 239000 | |
| }, | |
| { | |
| "epoch": 0.65, | |
| "learning_rate": 0.00016418067226890755, | |
| "loss": 1.7795, | |
| "step": 239500 | |
| }, | |
| { | |
| "epoch": 0.65, | |
| "learning_rate": 0.00016386554621848737, | |
| "loss": 1.7833, | |
| "step": 240000 | |
| }, | |
| { | |
| "epoch": 0.65, | |
| "learning_rate": 0.00016355042016806719, | |
| "loss": 1.7885, | |
| "step": 240500 | |
| }, | |
| { | |
| "epoch": 0.65, | |
| "learning_rate": 0.00016323529411764703, | |
| "loss": 1.7886, | |
| "step": 241000 | |
| }, | |
| { | |
| "epoch": 0.65, | |
| "learning_rate": 0.00016292016806722688, | |
| "loss": 1.7865, | |
| "step": 241500 | |
| }, | |
| { | |
| "epoch": 0.66, | |
| "learning_rate": 0.0001626050420168067, | |
| "loss": 1.787, | |
| "step": 242000 | |
| }, | |
| { | |
| "epoch": 0.66, | |
| "learning_rate": 0.00016228991596638654, | |
| "loss": 1.7791, | |
| "step": 242500 | |
| }, | |
| { | |
| "epoch": 0.66, | |
| "learning_rate": 0.00016197478991596635, | |
| "loss": 1.7786, | |
| "step": 243000 | |
| }, | |
| { | |
| "epoch": 0.66, | |
| "learning_rate": 0.0001616596638655462, | |
| "loss": 1.7819, | |
| "step": 243500 | |
| }, | |
| { | |
| "epoch": 0.66, | |
| "learning_rate": 0.00016134453781512602, | |
| "loss": 1.7766, | |
| "step": 244000 | |
| }, | |
| { | |
| "epoch": 0.66, | |
| "learning_rate": 0.00016102941176470586, | |
| "loss": 1.7766, | |
| "step": 244500 | |
| }, | |
| { | |
| "epoch": 0.66, | |
| "learning_rate": 0.0001607142857142857, | |
| "loss": 1.7809, | |
| "step": 245000 | |
| }, | |
| { | |
| "epoch": 0.66, | |
| "learning_rate": 0.00016039915966386552, | |
| "loss": 1.7708, | |
| "step": 245500 | |
| }, | |
| { | |
| "epoch": 0.67, | |
| "learning_rate": 0.00016008403361344537, | |
| "loss": 1.7713, | |
| "step": 246000 | |
| }, | |
| { | |
| "epoch": 0.67, | |
| "learning_rate": 0.00015976890756302519, | |
| "loss": 1.7773, | |
| "step": 246500 | |
| }, | |
| { | |
| "epoch": 0.67, | |
| "learning_rate": 0.00015945378151260503, | |
| "loss": 1.7746, | |
| "step": 247000 | |
| }, | |
| { | |
| "epoch": 0.67, | |
| "learning_rate": 0.00015913865546218485, | |
| "loss": 1.7722, | |
| "step": 247500 | |
| }, | |
| { | |
| "epoch": 0.67, | |
| "learning_rate": 0.0001588235294117647, | |
| "loss": 1.7707, | |
| "step": 248000 | |
| }, | |
| { | |
| "epoch": 0.67, | |
| "learning_rate": 0.00015850840336134454, | |
| "loss": 1.7785, | |
| "step": 248500 | |
| }, | |
| { | |
| "epoch": 0.67, | |
| "learning_rate": 0.00015819327731092435, | |
| "loss": 1.7745, | |
| "step": 249000 | |
| }, | |
| { | |
| "epoch": 0.68, | |
| "learning_rate": 0.0001578781512605042, | |
| "loss": 1.7746, | |
| "step": 249500 | |
| }, | |
| { | |
| "epoch": 0.68, | |
| "learning_rate": 0.00015756302521008402, | |
| "loss": 1.7706, | |
| "step": 250000 | |
| }, | |
| { | |
| "epoch": 0.68, | |
| "learning_rate": 0.00015724789915966386, | |
| "loss": 1.7689, | |
| "step": 250500 | |
| }, | |
| { | |
| "epoch": 0.68, | |
| "learning_rate": 0.0001569327731092437, | |
| "loss": 1.7702, | |
| "step": 251000 | |
| }, | |
| { | |
| "epoch": 0.68, | |
| "learning_rate": 0.00015661764705882352, | |
| "loss": 1.7716, | |
| "step": 251500 | |
| }, | |
| { | |
| "epoch": 0.68, | |
| "learning_rate": 0.00015630252100840337, | |
| "loss": 1.7709, | |
| "step": 252000 | |
| }, | |
| { | |
| "epoch": 0.68, | |
| "learning_rate": 0.00015598739495798318, | |
| "loss": 1.7689, | |
| "step": 252500 | |
| }, | |
| { | |
| "epoch": 0.68, | |
| "learning_rate": 0.00015567226890756303, | |
| "loss": 1.7709, | |
| "step": 253000 | |
| }, | |
| { | |
| "epoch": 0.69, | |
| "learning_rate": 0.00015535714285714285, | |
| "loss": 1.7666, | |
| "step": 253500 | |
| }, | |
| { | |
| "epoch": 0.69, | |
| "learning_rate": 0.0001550420168067227, | |
| "loss": 1.768, | |
| "step": 254000 | |
| }, | |
| { | |
| "epoch": 0.69, | |
| "learning_rate": 0.00015472689075630254, | |
| "loss": 1.7646, | |
| "step": 254500 | |
| }, | |
| { | |
| "epoch": 0.69, | |
| "learning_rate": 0.00015441176470588233, | |
| "loss": 1.7624, | |
| "step": 255000 | |
| }, | |
| { | |
| "epoch": 0.69, | |
| "learning_rate": 0.00015409663865546214, | |
| "loss": 1.7639, | |
| "step": 255500 | |
| }, | |
| { | |
| "epoch": 0.69, | |
| "learning_rate": 0.000153781512605042, | |
| "loss": 1.7684, | |
| "step": 256000 | |
| }, | |
| { | |
| "epoch": 0.69, | |
| "learning_rate": 0.00015346638655462183, | |
| "loss": 1.7621, | |
| "step": 256500 | |
| }, | |
| { | |
| "epoch": 0.7, | |
| "learning_rate": 0.00015315126050420165, | |
| "loss": 1.7632, | |
| "step": 257000 | |
| }, | |
| { | |
| "epoch": 0.7, | |
| "learning_rate": 0.0001528361344537815, | |
| "loss": 1.7617, | |
| "step": 257500 | |
| }, | |
| { | |
| "epoch": 0.7, | |
| "learning_rate": 0.0001525210084033613, | |
| "loss": 1.7616, | |
| "step": 258000 | |
| }, | |
| { | |
| "epoch": 0.7, | |
| "learning_rate": 0.00015220588235294116, | |
| "loss": 1.7607, | |
| "step": 258500 | |
| }, | |
| { | |
| "epoch": 0.7, | |
| "learning_rate": 0.000151890756302521, | |
| "loss": 1.7652, | |
| "step": 259000 | |
| }, | |
| { | |
| "epoch": 0.7, | |
| "learning_rate": 0.00015157563025210082, | |
| "loss": 1.7628, | |
| "step": 259500 | |
| }, | |
| { | |
| "epoch": 0.7, | |
| "learning_rate": 0.00015126050420168066, | |
| "loss": 1.7605, | |
| "step": 260000 | |
| }, | |
| { | |
| "epoch": 0.71, | |
| "learning_rate": 0.00015094537815126048, | |
| "loss": 1.7624, | |
| "step": 260500 | |
| }, | |
| { | |
| "epoch": 0.71, | |
| "learning_rate": 0.00015063025210084032, | |
| "loss": 1.7642, | |
| "step": 261000 | |
| }, | |
| { | |
| "epoch": 0.71, | |
| "learning_rate": 0.00015031512605042014, | |
| "loss": 1.7577, | |
| "step": 261500 | |
| }, | |
| { | |
| "epoch": 0.71, | |
| "learning_rate": 0.00015, | |
| "loss": 1.7586, | |
| "step": 262000 | |
| }, | |
| { | |
| "epoch": 0.71, | |
| "learning_rate": 0.00014968487394957983, | |
| "loss": 1.7582, | |
| "step": 262500 | |
| }, | |
| { | |
| "epoch": 0.71, | |
| "learning_rate": 0.00014936974789915965, | |
| "loss": 1.7578, | |
| "step": 263000 | |
| }, | |
| { | |
| "epoch": 0.71, | |
| "learning_rate": 0.0001490546218487395, | |
| "loss": 1.7638, | |
| "step": 263500 | |
| }, | |
| { | |
| "epoch": 0.71, | |
| "learning_rate": 0.0001487394957983193, | |
| "loss": 1.7529, | |
| "step": 264000 | |
| }, | |
| { | |
| "epoch": 0.72, | |
| "learning_rate": 0.00014842436974789916, | |
| "loss": 1.7571, | |
| "step": 264500 | |
| }, | |
| { | |
| "epoch": 0.72, | |
| "learning_rate": 0.00014810924369747897, | |
| "loss": 1.7603, | |
| "step": 265000 | |
| }, | |
| { | |
| "epoch": 0.72, | |
| "learning_rate": 0.00014779411764705882, | |
| "loss": 1.7539, | |
| "step": 265500 | |
| }, | |
| { | |
| "epoch": 0.72, | |
| "learning_rate": 0.00014747899159663863, | |
| "loss": 1.7537, | |
| "step": 266000 | |
| }, | |
| { | |
| "epoch": 0.72, | |
| "learning_rate": 0.00014716386554621848, | |
| "loss": 1.7532, | |
| "step": 266500 | |
| }, | |
| { | |
| "epoch": 0.72, | |
| "learning_rate": 0.0001468487394957983, | |
| "loss": 1.7529, | |
| "step": 267000 | |
| }, | |
| { | |
| "epoch": 0.72, | |
| "learning_rate": 0.00014653361344537814, | |
| "loss": 1.7504, | |
| "step": 267500 | |
| }, | |
| { | |
| "epoch": 0.73, | |
| "learning_rate": 0.00014621848739495796, | |
| "loss": 1.7513, | |
| "step": 268000 | |
| }, | |
| { | |
| "epoch": 0.73, | |
| "learning_rate": 0.0001459033613445378, | |
| "loss": 1.7492, | |
| "step": 268500 | |
| }, | |
| { | |
| "epoch": 0.73, | |
| "learning_rate": 0.00014558823529411762, | |
| "loss": 1.7476, | |
| "step": 269000 | |
| }, | |
| { | |
| "epoch": 0.73, | |
| "learning_rate": 0.00014527310924369747, | |
| "loss": 1.7557, | |
| "step": 269500 | |
| }, | |
| { | |
| "epoch": 0.73, | |
| "learning_rate": 0.0001449579831932773, | |
| "loss": 1.7506, | |
| "step": 270000 | |
| }, | |
| { | |
| "epoch": 0.73, | |
| "learning_rate": 0.00014464285714285713, | |
| "loss": 1.7497, | |
| "step": 270500 | |
| }, | |
| { | |
| "epoch": 0.73, | |
| "learning_rate": 0.00014432773109243697, | |
| "loss": 1.7499, | |
| "step": 271000 | |
| }, | |
| { | |
| "epoch": 0.73, | |
| "learning_rate": 0.0001440126050420168, | |
| "loss": 1.7459, | |
| "step": 271500 | |
| }, | |
| { | |
| "epoch": 0.74, | |
| "learning_rate": 0.00014369747899159663, | |
| "loss": 1.7445, | |
| "step": 272000 | |
| }, | |
| { | |
| "epoch": 0.74, | |
| "learning_rate": 0.00014338235294117648, | |
| "loss": 1.7445, | |
| "step": 272500 | |
| }, | |
| { | |
| "epoch": 0.74, | |
| "learning_rate": 0.0001430672268907563, | |
| "loss": 1.739, | |
| "step": 273000 | |
| }, | |
| { | |
| "epoch": 0.74, | |
| "learning_rate": 0.0001427521008403361, | |
| "loss": 1.7409, | |
| "step": 273500 | |
| }, | |
| { | |
| "epoch": 0.74, | |
| "learning_rate": 0.00014243697478991596, | |
| "loss": 1.7429, | |
| "step": 274000 | |
| }, | |
| { | |
| "epoch": 0.74, | |
| "learning_rate": 0.00014212184873949578, | |
| "loss": 1.741, | |
| "step": 274500 | |
| }, | |
| { | |
| "epoch": 0.74, | |
| "learning_rate": 0.00014180672268907562, | |
| "loss": 1.7431, | |
| "step": 275000 | |
| }, | |
| { | |
| "epoch": 0.75, | |
| "learning_rate": 0.00014149159663865544, | |
| "loss": 1.745, | |
| "step": 275500 | |
| }, | |
| { | |
| "epoch": 0.75, | |
| "learning_rate": 0.00014117647058823528, | |
| "loss": 1.7412, | |
| "step": 276000 | |
| }, | |
| { | |
| "epoch": 0.75, | |
| "learning_rate": 0.00014086134453781513, | |
| "loss": 1.7372, | |
| "step": 276500 | |
| }, | |
| { | |
| "epoch": 0.75, | |
| "learning_rate": 0.00014054621848739494, | |
| "loss": 1.7369, | |
| "step": 277000 | |
| }, | |
| { | |
| "epoch": 0.75, | |
| "learning_rate": 0.0001402310924369748, | |
| "loss": 1.7373, | |
| "step": 277500 | |
| }, | |
| { | |
| "epoch": 0.75, | |
| "learning_rate": 0.0001399159663865546, | |
| "loss": 1.7404, | |
| "step": 278000 | |
| }, | |
| { | |
| "epoch": 0.75, | |
| "learning_rate": 0.00013960084033613445, | |
| "loss": 1.7424, | |
| "step": 278500 | |
| }, | |
| { | |
| "epoch": 0.76, | |
| "learning_rate": 0.00013928571428571427, | |
| "loss": 1.7359, | |
| "step": 279000 | |
| }, | |
| { | |
| "epoch": 0.76, | |
| "learning_rate": 0.0001389705882352941, | |
| "loss": 1.7376, | |
| "step": 279500 | |
| }, | |
| { | |
| "epoch": 0.76, | |
| "learning_rate": 0.00013865546218487396, | |
| "loss": 1.7362, | |
| "step": 280000 | |
| }, | |
| { | |
| "epoch": 0.76, | |
| "learning_rate": 0.00013834033613445377, | |
| "loss": 1.7405, | |
| "step": 280500 | |
| }, | |
| { | |
| "epoch": 0.76, | |
| "learning_rate": 0.0001380252100840336, | |
| "loss": 1.7375, | |
| "step": 281000 | |
| }, | |
| { | |
| "epoch": 0.76, | |
| "learning_rate": 0.00013771008403361344, | |
| "loss": 1.7355, | |
| "step": 281500 | |
| }, | |
| { | |
| "epoch": 0.76, | |
| "learning_rate": 0.00013739495798319325, | |
| "loss": 1.733, | |
| "step": 282000 | |
| }, | |
| { | |
| "epoch": 0.76, | |
| "learning_rate": 0.0001370798319327731, | |
| "loss": 1.7353, | |
| "step": 282500 | |
| }, | |
| { | |
| "epoch": 0.77, | |
| "learning_rate": 0.00013676470588235292, | |
| "loss": 1.7307, | |
| "step": 283000 | |
| }, | |
| { | |
| "epoch": 0.77, | |
| "learning_rate": 0.00013644957983193276, | |
| "loss": 1.7362, | |
| "step": 283500 | |
| }, | |
| { | |
| "epoch": 0.77, | |
| "learning_rate": 0.0001361344537815126, | |
| "loss": 1.7338, | |
| "step": 284000 | |
| }, | |
| { | |
| "epoch": 0.77, | |
| "learning_rate": 0.00013581932773109242, | |
| "loss": 1.732, | |
| "step": 284500 | |
| }, | |
| { | |
| "epoch": 0.77, | |
| "learning_rate": 0.00013550420168067227, | |
| "loss": 1.7284, | |
| "step": 285000 | |
| }, | |
| { | |
| "epoch": 0.77, | |
| "learning_rate": 0.00013518907563025208, | |
| "loss": 1.7307, | |
| "step": 285500 | |
| }, | |
| { | |
| "epoch": 0.77, | |
| "learning_rate": 0.00013487394957983193, | |
| "loss": 1.7303, | |
| "step": 286000 | |
| }, | |
| { | |
| "epoch": 0.78, | |
| "learning_rate": 0.00013455882352941175, | |
| "loss": 1.7277, | |
| "step": 286500 | |
| }, | |
| { | |
| "epoch": 0.78, | |
| "learning_rate": 0.0001342436974789916, | |
| "loss": 1.7332, | |
| "step": 287000 | |
| }, | |
| { | |
| "epoch": 0.78, | |
| "learning_rate": 0.00013392857142857144, | |
| "loss": 1.7255, | |
| "step": 287500 | |
| }, | |
| { | |
| "epoch": 0.78, | |
| "learning_rate": 0.00013361344537815125, | |
| "loss": 1.7242, | |
| "step": 288000 | |
| }, | |
| { | |
| "epoch": 0.78, | |
| "learning_rate": 0.00013329831932773107, | |
| "loss": 1.7248, | |
| "step": 288500 | |
| }, | |
| { | |
| "epoch": 0.78, | |
| "learning_rate": 0.00013298319327731091, | |
| "loss": 1.731, | |
| "step": 289000 | |
| }, | |
| { | |
| "epoch": 0.78, | |
| "learning_rate": 0.00013266806722689073, | |
| "loss": 1.7324, | |
| "step": 289500 | |
| }, | |
| { | |
| "epoch": 0.79, | |
| "learning_rate": 0.00013235294117647058, | |
| "loss": 1.7314, | |
| "step": 290000 | |
| }, | |
| { | |
| "epoch": 0.79, | |
| "learning_rate": 0.0001320378151260504, | |
| "loss": 1.7325, | |
| "step": 290500 | |
| }, | |
| { | |
| "epoch": 0.79, | |
| "learning_rate": 0.00013172268907563024, | |
| "loss": 1.7259, | |
| "step": 291000 | |
| }, | |
| { | |
| "epoch": 0.79, | |
| "learning_rate": 0.00013140756302521008, | |
| "loss": 1.7271, | |
| "step": 291500 | |
| }, | |
| { | |
| "epoch": 0.79, | |
| "learning_rate": 0.0001310924369747899, | |
| "loss": 1.7257, | |
| "step": 292000 | |
| }, | |
| { | |
| "epoch": 0.79, | |
| "learning_rate": 0.00013077731092436975, | |
| "loss": 1.7267, | |
| "step": 292500 | |
| }, | |
| { | |
| "epoch": 0.79, | |
| "learning_rate": 0.00013046218487394956, | |
| "loss": 1.7262, | |
| "step": 293000 | |
| }, | |
| { | |
| "epoch": 0.79, | |
| "learning_rate": 0.0001301470588235294, | |
| "loss": 1.7245, | |
| "step": 293500 | |
| }, | |
| { | |
| "epoch": 0.8, | |
| "learning_rate": 0.00012983193277310922, | |
| "loss": 1.7252, | |
| "step": 294000 | |
| }, | |
| { | |
| "epoch": 0.8, | |
| "learning_rate": 0.00012951680672268907, | |
| "loss": 1.7217, | |
| "step": 294500 | |
| }, | |
| { | |
| "epoch": 0.8, | |
| "learning_rate": 0.00012920168067226891, | |
| "loss": 1.7193, | |
| "step": 295000 | |
| }, | |
| { | |
| "epoch": 0.8, | |
| "learning_rate": 0.00012888655462184873, | |
| "loss": 1.7184, | |
| "step": 295500 | |
| }, | |
| { | |
| "epoch": 0.8, | |
| "learning_rate": 0.00012857142857142855, | |
| "loss": 1.7205, | |
| "step": 296000 | |
| }, | |
| { | |
| "epoch": 0.8, | |
| "learning_rate": 0.0001282563025210084, | |
| "loss": 1.7195, | |
| "step": 296500 | |
| }, | |
| { | |
| "epoch": 0.8, | |
| "learning_rate": 0.0001279411764705882, | |
| "loss": 1.7195, | |
| "step": 297000 | |
| }, | |
| { | |
| "epoch": 0.81, | |
| "learning_rate": 0.00012762605042016806, | |
| "loss": 1.7126, | |
| "step": 297500 | |
| }, | |
| { | |
| "epoch": 0.81, | |
| "learning_rate": 0.00012731092436974787, | |
| "loss": 1.7158, | |
| "step": 298000 | |
| }, | |
| { | |
| "epoch": 0.81, | |
| "learning_rate": 0.00012699579831932772, | |
| "loss": 1.7137, | |
| "step": 298500 | |
| }, | |
| { | |
| "epoch": 0.81, | |
| "learning_rate": 0.00012668067226890756, | |
| "loss": 1.7154, | |
| "step": 299000 | |
| }, | |
| { | |
| "epoch": 0.81, | |
| "learning_rate": 0.00012636554621848738, | |
| "loss": 1.7192, | |
| "step": 299500 | |
| }, | |
| { | |
| "epoch": 0.81, | |
| "learning_rate": 0.00012605042016806722, | |
| "loss": 1.7155, | |
| "step": 300000 | |
| }, | |
| { | |
| "epoch": 0.81, | |
| "learning_rate": 0.00012573529411764704, | |
| "loss": 1.7106, | |
| "step": 300500 | |
| }, | |
| { | |
| "epoch": 0.81, | |
| "learning_rate": 0.00012542016806722689, | |
| "loss": 1.7127, | |
| "step": 301000 | |
| }, | |
| { | |
| "epoch": 0.82, | |
| "learning_rate": 0.00012510504201680673, | |
| "loss": 1.7103, | |
| "step": 301500 | |
| }, | |
| { | |
| "epoch": 0.82, | |
| "learning_rate": 0.00012478991596638655, | |
| "loss": 1.7125, | |
| "step": 302000 | |
| }, | |
| { | |
| "epoch": 0.82, | |
| "learning_rate": 0.0001244747899159664, | |
| "loss": 1.7141, | |
| "step": 302500 | |
| }, | |
| { | |
| "epoch": 0.82, | |
| "learning_rate": 0.0001241596638655462, | |
| "loss": 1.7122, | |
| "step": 303000 | |
| }, | |
| { | |
| "epoch": 0.82, | |
| "learning_rate": 0.00012384453781512603, | |
| "loss": 1.7121, | |
| "step": 303500 | |
| }, | |
| { | |
| "epoch": 0.82, | |
| "learning_rate": 0.00012352941176470587, | |
| "loss": 1.7106, | |
| "step": 304000 | |
| }, | |
| { | |
| "epoch": 0.82, | |
| "learning_rate": 0.0001232142857142857, | |
| "loss": 1.708, | |
| "step": 304500 | |
| }, | |
| { | |
| "epoch": 0.83, | |
| "learning_rate": 0.00012289915966386553, | |
| "loss": 1.7086, | |
| "step": 305000 | |
| }, | |
| { | |
| "epoch": 0.83, | |
| "learning_rate": 0.00012258403361344538, | |
| "loss": 1.7096, | |
| "step": 305500 | |
| }, | |
| { | |
| "epoch": 0.83, | |
| "learning_rate": 0.0001222689075630252, | |
| "loss": 1.7094, | |
| "step": 306000 | |
| }, | |
| { | |
| "epoch": 0.83, | |
| "learning_rate": 0.00012195378151260503, | |
| "loss": 1.7103, | |
| "step": 306500 | |
| }, | |
| { | |
| "epoch": 0.83, | |
| "learning_rate": 0.00012163865546218486, | |
| "loss": 1.7038, | |
| "step": 307000 | |
| }, | |
| { | |
| "epoch": 0.83, | |
| "learning_rate": 0.0001213235294117647, | |
| "loss": 1.707, | |
| "step": 307500 | |
| }, | |
| { | |
| "epoch": 0.83, | |
| "learning_rate": 0.00012100840336134453, | |
| "loss": 1.7094, | |
| "step": 308000 | |
| }, | |
| { | |
| "epoch": 0.84, | |
| "learning_rate": 0.00012069327731092436, | |
| "loss": 1.7078, | |
| "step": 308500 | |
| }, | |
| { | |
| "epoch": 0.84, | |
| "learning_rate": 0.0001203781512605042, | |
| "loss": 1.7043, | |
| "step": 309000 | |
| }, | |
| { | |
| "epoch": 0.84, | |
| "learning_rate": 0.00012006302521008403, | |
| "loss": 1.7079, | |
| "step": 309500 | |
| }, | |
| { | |
| "epoch": 0.84, | |
| "learning_rate": 0.00011974789915966386, | |
| "loss": 1.7025, | |
| "step": 310000 | |
| }, | |
| { | |
| "epoch": 0.84, | |
| "learning_rate": 0.00011943277310924367, | |
| "loss": 1.7065, | |
| "step": 310500 | |
| }, | |
| { | |
| "epoch": 0.84, | |
| "learning_rate": 0.0001191176470588235, | |
| "loss": 1.7047, | |
| "step": 311000 | |
| }, | |
| { | |
| "epoch": 0.84, | |
| "learning_rate": 0.00011880252100840335, | |
| "loss": 1.6999, | |
| "step": 311500 | |
| }, | |
| { | |
| "epoch": 0.84, | |
| "learning_rate": 0.00011848739495798318, | |
| "loss": 1.7018, | |
| "step": 312000 | |
| }, | |
| { | |
| "epoch": 0.85, | |
| "learning_rate": 0.00011817226890756301, | |
| "loss": 1.7037, | |
| "step": 312500 | |
| }, | |
| { | |
| "epoch": 0.85, | |
| "learning_rate": 0.00011785714285714284, | |
| "loss": 1.6972, | |
| "step": 313000 | |
| }, | |
| { | |
| "epoch": 0.85, | |
| "learning_rate": 0.00011754201680672267, | |
| "loss": 1.6998, | |
| "step": 313500 | |
| }, | |
| { | |
| "epoch": 0.85, | |
| "learning_rate": 0.0001172268907563025, | |
| "loss": 1.7012, | |
| "step": 314000 | |
| }, | |
| { | |
| "epoch": 0.85, | |
| "learning_rate": 0.00011691176470588235, | |
| "loss": 1.6978, | |
| "step": 314500 | |
| }, | |
| { | |
| "epoch": 0.85, | |
| "learning_rate": 0.00011659663865546218, | |
| "loss": 1.7013, | |
| "step": 315000 | |
| }, | |
| { | |
| "epoch": 0.85, | |
| "learning_rate": 0.00011628151260504201, | |
| "loss": 1.6982, | |
| "step": 315500 | |
| }, | |
| { | |
| "epoch": 0.86, | |
| "learning_rate": 0.00011596638655462184, | |
| "loss": 1.699, | |
| "step": 316000 | |
| }, | |
| { | |
| "epoch": 0.86, | |
| "learning_rate": 0.00011565126050420167, | |
| "loss": 1.6956, | |
| "step": 316500 | |
| }, | |
| { | |
| "epoch": 0.86, | |
| "learning_rate": 0.0001153361344537815, | |
| "loss": 1.7124, | |
| "step": 317000 | |
| }, | |
| { | |
| "epoch": 0.86, | |
| "learning_rate": 0.00011502100840336134, | |
| "loss": 1.7101, | |
| "step": 317500 | |
| }, | |
| { | |
| "epoch": 0.86, | |
| "learning_rate": 0.00011470588235294115, | |
| "loss": 1.7024, | |
| "step": 318000 | |
| }, | |
| { | |
| "epoch": 0.86, | |
| "learning_rate": 0.000114390756302521, | |
| "loss": 1.7011, | |
| "step": 318500 | |
| }, | |
| { | |
| "epoch": 0.86, | |
| "learning_rate": 0.00011407563025210083, | |
| "loss": 1.7009, | |
| "step": 319000 | |
| }, | |
| { | |
| "epoch": 0.86, | |
| "learning_rate": 0.00011376050420168066, | |
| "loss": 1.6989, | |
| "step": 319500 | |
| }, | |
| { | |
| "epoch": 0.87, | |
| "learning_rate": 0.00011344537815126049, | |
| "loss": 1.7002, | |
| "step": 320000 | |
| }, | |
| { | |
| "epoch": 0.87, | |
| "learning_rate": 0.00011313025210084032, | |
| "loss": 1.6989, | |
| "step": 320500 | |
| }, | |
| { | |
| "epoch": 0.87, | |
| "learning_rate": 0.00011281512605042015, | |
| "loss": 1.6977, | |
| "step": 321000 | |
| }, | |
| { | |
| "epoch": 0.87, | |
| "learning_rate": 0.0001125, | |
| "loss": 1.6991, | |
| "step": 321500 | |
| }, | |
| { | |
| "epoch": 0.87, | |
| "learning_rate": 0.00011218487394957983, | |
| "loss": 1.6965, | |
| "step": 322000 | |
| }, | |
| { | |
| "epoch": 0.87, | |
| "learning_rate": 0.00011186974789915966, | |
| "loss": 1.6934, | |
| "step": 322500 | |
| }, | |
| { | |
| "epoch": 0.87, | |
| "learning_rate": 0.00011155462184873949, | |
| "loss": 1.6943, | |
| "step": 323000 | |
| }, | |
| { | |
| "epoch": 0.88, | |
| "learning_rate": 0.00011123949579831932, | |
| "loss": 1.6894, | |
| "step": 323500 | |
| }, | |
| { | |
| "epoch": 0.88, | |
| "learning_rate": 0.00011092436974789915, | |
| "loss": 1.6906, | |
| "step": 324000 | |
| }, | |
| { | |
| "epoch": 0.88, | |
| "learning_rate": 0.00011060924369747898, | |
| "loss": 1.7128, | |
| "step": 324500 | |
| }, | |
| { | |
| "epoch": 0.88, | |
| "learning_rate": 0.00011029411764705883, | |
| "loss": 1.6963, | |
| "step": 325000 | |
| }, | |
| { | |
| "epoch": 0.88, | |
| "learning_rate": 0.00010997899159663864, | |
| "loss": 1.7206, | |
| "step": 325500 | |
| }, | |
| { | |
| "epoch": 0.88, | |
| "learning_rate": 0.00010966386554621848, | |
| "loss": 1.7105, | |
| "step": 326000 | |
| }, | |
| { | |
| "epoch": 0.88, | |
| "learning_rate": 0.00010934873949579831, | |
| "loss": 1.6997, | |
| "step": 326500 | |
| }, | |
| { | |
| "epoch": 0.89, | |
| "learning_rate": 0.00010903361344537814, | |
| "loss": 1.7016, | |
| "step": 327000 | |
| }, | |
| { | |
| "epoch": 0.89, | |
| "learning_rate": 0.00010871848739495797, | |
| "loss": 1.7026, | |
| "step": 327500 | |
| }, | |
| { | |
| "epoch": 0.89, | |
| "learning_rate": 0.0001084033613445378, | |
| "loss": 1.6942, | |
| "step": 328000 | |
| }, | |
| { | |
| "epoch": 0.89, | |
| "learning_rate": 0.00010808823529411763, | |
| "loss": 1.6964, | |
| "step": 328500 | |
| }, | |
| { | |
| "epoch": 0.89, | |
| "learning_rate": 0.00010777310924369748, | |
| "loss": 1.696, | |
| "step": 329000 | |
| }, | |
| { | |
| "epoch": 0.89, | |
| "learning_rate": 0.0001074579831932773, | |
| "loss": 1.694, | |
| "step": 329500 | |
| }, | |
| { | |
| "epoch": 0.89, | |
| "learning_rate": 0.00010714285714285714, | |
| "loss": 1.6878, | |
| "step": 330000 | |
| }, | |
| { | |
| "epoch": 0.89, | |
| "learning_rate": 0.00010682773109243697, | |
| "loss": 1.6921, | |
| "step": 330500 | |
| }, | |
| { | |
| "epoch": 0.9, | |
| "learning_rate": 0.0001065126050420168, | |
| "loss": 1.6871, | |
| "step": 331000 | |
| }, | |
| { | |
| "epoch": 0.9, | |
| "learning_rate": 0.00010619747899159663, | |
| "loss": 1.6846, | |
| "step": 331500 | |
| }, | |
| { | |
| "epoch": 0.9, | |
| "learning_rate": 0.00010588235294117647, | |
| "loss": 1.6895, | |
| "step": 332000 | |
| }, | |
| { | |
| "epoch": 0.9, | |
| "learning_rate": 0.0001055672268907563, | |
| "loss": 1.6855, | |
| "step": 332500 | |
| }, | |
| { | |
| "epoch": 0.9, | |
| "learning_rate": 0.00010525210084033612, | |
| "loss": 1.6807, | |
| "step": 333000 | |
| }, | |
| { | |
| "epoch": 0.9, | |
| "learning_rate": 0.00010493697478991595, | |
| "loss": 1.6864, | |
| "step": 333500 | |
| }, | |
| { | |
| "epoch": 0.9, | |
| "learning_rate": 0.00010462184873949579, | |
| "loss": 1.681, | |
| "step": 334000 | |
| }, | |
| { | |
| "epoch": 0.91, | |
| "learning_rate": 0.00010430672268907562, | |
| "loss": 1.6913, | |
| "step": 334500 | |
| }, | |
| { | |
| "epoch": 0.91, | |
| "learning_rate": 0.00010399159663865545, | |
| "loss": 1.6789, | |
| "step": 335000 | |
| }, | |
| { | |
| "epoch": 0.91, | |
| "learning_rate": 0.00010367647058823528, | |
| "loss": 1.6939, | |
| "step": 335500 | |
| }, | |
| { | |
| "epoch": 0.91, | |
| "learning_rate": 0.00010336134453781512, | |
| "loss": 1.7741, | |
| "step": 336000 | |
| }, | |
| { | |
| "epoch": 0.91, | |
| "learning_rate": 0.00010304621848739495, | |
| "loss": 1.7619, | |
| "step": 336500 | |
| }, | |
| { | |
| "epoch": 0.91, | |
| "learning_rate": 0.00010273109243697478, | |
| "loss": 1.7091, | |
| "step": 337000 | |
| }, | |
| { | |
| "epoch": 0.91, | |
| "learning_rate": 0.00010241596638655462, | |
| "loss": 1.7116, | |
| "step": 337500 | |
| }, | |
| { | |
| "epoch": 0.91, | |
| "learning_rate": 0.00010210084033613445, | |
| "loss": 1.7243, | |
| "step": 338000 | |
| }, | |
| { | |
| "epoch": 0.92, | |
| "learning_rate": 0.00010178571428571428, | |
| "loss": 1.7255, | |
| "step": 338500 | |
| }, | |
| { | |
| "epoch": 0.92, | |
| "learning_rate": 0.00010147058823529411, | |
| "loss": 1.715, | |
| "step": 339000 | |
| }, | |
| { | |
| "epoch": 0.92, | |
| "learning_rate": 0.00010115546218487395, | |
| "loss": 1.6924, | |
| "step": 339500 | |
| }, | |
| { | |
| "epoch": 0.92, | |
| "learning_rate": 0.00010084033613445378, | |
| "loss": 1.6824, | |
| "step": 340000 | |
| }, | |
| { | |
| "epoch": 0.92, | |
| "learning_rate": 0.0001005252100840336, | |
| "loss": 1.6885, | |
| "step": 340500 | |
| }, | |
| { | |
| "epoch": 0.92, | |
| "learning_rate": 0.00010021008403361343, | |
| "loss": 1.6848, | |
| "step": 341000 | |
| }, | |
| { | |
| "epoch": 0.92, | |
| "learning_rate": 9.989495798319326e-05, | |
| "loss": 1.6831, | |
| "step": 341500 | |
| }, | |
| { | |
| "epoch": 0.93, | |
| "learning_rate": 9.95798319327731e-05, | |
| "loss": 1.6836, | |
| "step": 342000 | |
| }, | |
| { | |
| "epoch": 0.93, | |
| "learning_rate": 9.926470588235293e-05, | |
| "loss": 1.6765, | |
| "step": 342500 | |
| }, | |
| { | |
| "epoch": 0.93, | |
| "learning_rate": 9.894957983193276e-05, | |
| "loss": 1.6775, | |
| "step": 343000 | |
| }, | |
| { | |
| "epoch": 0.93, | |
| "learning_rate": 9.86344537815126e-05, | |
| "loss": 1.6854, | |
| "step": 343500 | |
| }, | |
| { | |
| "epoch": 0.93, | |
| "learning_rate": 9.831932773109243e-05, | |
| "loss": 1.6851, | |
| "step": 344000 | |
| }, | |
| { | |
| "epoch": 0.93, | |
| "learning_rate": 9.800420168067226e-05, | |
| "loss": 1.6761, | |
| "step": 344500 | |
| }, | |
| { | |
| "epoch": 0.93, | |
| "learning_rate": 9.76890756302521e-05, | |
| "loss": 1.6693, | |
| "step": 345000 | |
| }, | |
| { | |
| "epoch": 0.94, | |
| "learning_rate": 9.737394957983193e-05, | |
| "loss": 1.6734, | |
| "step": 345500 | |
| }, | |
| { | |
| "epoch": 0.94, | |
| "learning_rate": 9.705882352941176e-05, | |
| "loss": 1.6774, | |
| "step": 346000 | |
| }, | |
| { | |
| "epoch": 0.94, | |
| "learning_rate": 9.67436974789916e-05, | |
| "loss": 1.6749, | |
| "step": 346500 | |
| }, | |
| { | |
| "epoch": 0.94, | |
| "learning_rate": 9.642857142857143e-05, | |
| "loss": 1.6789, | |
| "step": 347000 | |
| }, | |
| { | |
| "epoch": 0.94, | |
| "learning_rate": 9.611344537815126e-05, | |
| "loss": 1.6758, | |
| "step": 347500 | |
| }, | |
| { | |
| "epoch": 0.94, | |
| "learning_rate": 9.579831932773108e-05, | |
| "loss": 1.6753, | |
| "step": 348000 | |
| }, | |
| { | |
| "epoch": 0.94, | |
| "learning_rate": 9.548319327731091e-05, | |
| "loss": 1.6737, | |
| "step": 348500 | |
| }, | |
| { | |
| "epoch": 0.94, | |
| "learning_rate": 9.516806722689074e-05, | |
| "loss": 1.6723, | |
| "step": 349000 | |
| }, | |
| { | |
| "epoch": 0.95, | |
| "learning_rate": 9.485294117647057e-05, | |
| "loss": 1.6752, | |
| "step": 349500 | |
| }, | |
| { | |
| "epoch": 0.95, | |
| "learning_rate": 9.45378151260504e-05, | |
| "loss": 1.6706, | |
| "step": 350000 | |
| }, | |
| { | |
| "epoch": 0.95, | |
| "learning_rate": 9.422268907563025e-05, | |
| "loss": 1.669, | |
| "step": 350500 | |
| }, | |
| { | |
| "epoch": 0.95, | |
| "learning_rate": 9.390756302521008e-05, | |
| "loss": 1.6694, | |
| "step": 351000 | |
| }, | |
| { | |
| "epoch": 0.95, | |
| "learning_rate": 9.359243697478991e-05, | |
| "loss": 1.6677, | |
| "step": 351500 | |
| }, | |
| { | |
| "epoch": 0.95, | |
| "learning_rate": 9.327731092436974e-05, | |
| "loss": 1.6709, | |
| "step": 352000 | |
| }, | |
| { | |
| "epoch": 0.95, | |
| "learning_rate": 9.296218487394957e-05, | |
| "loss": 1.6645, | |
| "step": 352500 | |
| }, | |
| { | |
| "epoch": 0.96, | |
| "learning_rate": 9.26470588235294e-05, | |
| "loss": 1.6648, | |
| "step": 353000 | |
| }, | |
| { | |
| "epoch": 0.96, | |
| "learning_rate": 9.233193277310923e-05, | |
| "loss": 1.6717, | |
| "step": 353500 | |
| }, | |
| { | |
| "epoch": 0.96, | |
| "learning_rate": 9.201680672268908e-05, | |
| "loss": 1.6614, | |
| "step": 354000 | |
| }, | |
| { | |
| "epoch": 0.96, | |
| "learning_rate": 9.170168067226891e-05, | |
| "loss": 1.664, | |
| "step": 354500 | |
| }, | |
| { | |
| "epoch": 0.96, | |
| "learning_rate": 9.138655462184873e-05, | |
| "loss": 1.6681, | |
| "step": 355000 | |
| }, | |
| { | |
| "epoch": 0.96, | |
| "learning_rate": 9.107142857142856e-05, | |
| "loss": 1.6683, | |
| "step": 355500 | |
| }, | |
| { | |
| "epoch": 0.96, | |
| "learning_rate": 9.075630252100839e-05, | |
| "loss": 1.6639, | |
| "step": 356000 | |
| }, | |
| { | |
| "epoch": 0.97, | |
| "learning_rate": 9.044117647058822e-05, | |
| "loss": 1.6637, | |
| "step": 356500 | |
| }, | |
| { | |
| "epoch": 0.97, | |
| "learning_rate": 9.012605042016805e-05, | |
| "loss": 1.6576, | |
| "step": 357000 | |
| }, | |
| { | |
| "epoch": 0.97, | |
| "learning_rate": 8.981092436974788e-05, | |
| "loss": 1.6616, | |
| "step": 357500 | |
| }, | |
| { | |
| "epoch": 0.97, | |
| "learning_rate": 8.949579831932773e-05, | |
| "loss": 1.6604, | |
| "step": 358000 | |
| }, | |
| { | |
| "epoch": 0.97, | |
| "learning_rate": 8.918067226890756e-05, | |
| "loss": 1.6611, | |
| "step": 358500 | |
| }, | |
| { | |
| "epoch": 0.97, | |
| "learning_rate": 8.886554621848739e-05, | |
| "loss": 1.6597, | |
| "step": 359000 | |
| }, | |
| { | |
| "epoch": 0.97, | |
| "learning_rate": 8.855042016806722e-05, | |
| "loss": 1.6613, | |
| "step": 359500 | |
| }, | |
| { | |
| "epoch": 0.97, | |
| "learning_rate": 8.823529411764705e-05, | |
| "loss": 1.6588, | |
| "step": 360000 | |
| }, | |
| { | |
| "epoch": 0.98, | |
| "learning_rate": 8.792016806722688e-05, | |
| "loss": 1.6573, | |
| "step": 360500 | |
| }, | |
| { | |
| "epoch": 0.98, | |
| "learning_rate": 8.760504201680673e-05, | |
| "loss": 1.6587, | |
| "step": 361000 | |
| }, | |
| { | |
| "epoch": 0.98, | |
| "learning_rate": 8.728991596638656e-05, | |
| "loss": 1.6581, | |
| "step": 361500 | |
| }, | |
| { | |
| "epoch": 0.98, | |
| "learning_rate": 8.697478991596639e-05, | |
| "loss": 1.6531, | |
| "step": 362000 | |
| }, | |
| { | |
| "epoch": 0.98, | |
| "learning_rate": 8.66596638655462e-05, | |
| "loss": 1.6542, | |
| "step": 362500 | |
| }, | |
| { | |
| "epoch": 0.98, | |
| "learning_rate": 8.634453781512604e-05, | |
| "loss": 1.6545, | |
| "step": 363000 | |
| }, | |
| { | |
| "epoch": 0.98, | |
| "learning_rate": 8.602941176470587e-05, | |
| "loss": 1.6519, | |
| "step": 363500 | |
| }, | |
| { | |
| "epoch": 0.99, | |
| "learning_rate": 8.57142857142857e-05, | |
| "loss": 1.6557, | |
| "step": 364000 | |
| }, | |
| { | |
| "epoch": 0.99, | |
| "learning_rate": 8.539915966386553e-05, | |
| "loss": 1.6518, | |
| "step": 364500 | |
| }, | |
| { | |
| "epoch": 0.99, | |
| "learning_rate": 8.508403361344537e-05, | |
| "loss": 1.6531, | |
| "step": 365000 | |
| }, | |
| { | |
| "epoch": 0.99, | |
| "learning_rate": 8.47689075630252e-05, | |
| "loss": 1.6481, | |
| "step": 365500 | |
| }, | |
| { | |
| "epoch": 0.99, | |
| "learning_rate": 8.445378151260504e-05, | |
| "loss": 1.6475, | |
| "step": 366000 | |
| }, | |
| { | |
| "epoch": 0.99, | |
| "learning_rate": 8.413865546218487e-05, | |
| "loss": 1.6491, | |
| "step": 366500 | |
| }, | |
| { | |
| "epoch": 0.99, | |
| "learning_rate": 8.38235294117647e-05, | |
| "loss": 1.6556, | |
| "step": 367000 | |
| }, | |
| { | |
| "epoch": 0.99, | |
| "learning_rate": 8.350840336134453e-05, | |
| "loss": 1.6472, | |
| "step": 367500 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "learning_rate": 8.319327731092436e-05, | |
| "loss": 1.6477, | |
| "step": 368000 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "learning_rate": 8.28781512605042e-05, | |
| "loss": 1.6496, | |
| "step": 368500 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "learning_rate": 8.256302521008404e-05, | |
| "loss": 1.6479, | |
| "step": 369000 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "learning_rate": 8.224789915966387e-05, | |
| "loss": 1.6492, | |
| "step": 369500 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "learning_rate": 8.193277310924368e-05, | |
| "loss": 1.6443, | |
| "step": 370000 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "learning_rate": 8.161764705882352e-05, | |
| "loss": 1.6443, | |
| "step": 370500 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "learning_rate": 8.130252100840335e-05, | |
| "loss": 1.6462, | |
| "step": 371000 | |
| }, | |
| { | |
| "epoch": 1.01, | |
| "learning_rate": 8.098739495798318e-05, | |
| "loss": 1.6454, | |
| "step": 371500 | |
| }, | |
| { | |
| "epoch": 1.01, | |
| "learning_rate": 8.067226890756301e-05, | |
| "loss": 1.6416, | |
| "step": 372000 | |
| }, | |
| { | |
| "epoch": 1.01, | |
| "learning_rate": 8.035714285714285e-05, | |
| "loss": 1.6433, | |
| "step": 372500 | |
| }, | |
| { | |
| "epoch": 1.01, | |
| "learning_rate": 8.004201680672268e-05, | |
| "loss": 1.6447, | |
| "step": 373000 | |
| }, | |
| { | |
| "epoch": 1.01, | |
| "learning_rate": 7.972689075630251e-05, | |
| "loss": 1.6454, | |
| "step": 373500 | |
| }, | |
| { | |
| "epoch": 1.01, | |
| "learning_rate": 7.941176470588235e-05, | |
| "loss": 1.6402, | |
| "step": 374000 | |
| }, | |
| { | |
| "epoch": 1.01, | |
| "learning_rate": 7.909663865546218e-05, | |
| "loss": 1.642, | |
| "step": 374500 | |
| }, | |
| { | |
| "epoch": 1.02, | |
| "learning_rate": 7.878151260504201e-05, | |
| "loss": 1.6401, | |
| "step": 375000 | |
| }, | |
| { | |
| "epoch": 1.02, | |
| "learning_rate": 7.846638655462185e-05, | |
| "loss": 1.6446, | |
| "step": 375500 | |
| }, | |
| { | |
| "epoch": 1.02, | |
| "learning_rate": 7.815126050420168e-05, | |
| "loss": 1.6374, | |
| "step": 376000 | |
| }, | |
| { | |
| "epoch": 1.02, | |
| "learning_rate": 7.783613445378151e-05, | |
| "loss": 1.6425, | |
| "step": 376500 | |
| }, | |
| { | |
| "epoch": 1.02, | |
| "learning_rate": 7.752100840336135e-05, | |
| "loss": 1.6418, | |
| "step": 377000 | |
| }, | |
| { | |
| "epoch": 1.02, | |
| "learning_rate": 7.720588235294116e-05, | |
| "loss": 1.6407, | |
| "step": 377500 | |
| }, | |
| { | |
| "epoch": 1.02, | |
| "learning_rate": 7.6890756302521e-05, | |
| "loss": 1.6373, | |
| "step": 378000 | |
| }, | |
| { | |
| "epoch": 1.02, | |
| "learning_rate": 7.657563025210082e-05, | |
| "loss": 1.6397, | |
| "step": 378500 | |
| }, | |
| { | |
| "epoch": 1.03, | |
| "learning_rate": 7.626050420168066e-05, | |
| "loss": 1.6408, | |
| "step": 379000 | |
| }, | |
| { | |
| "epoch": 1.03, | |
| "learning_rate": 7.59453781512605e-05, | |
| "loss": 1.6407, | |
| "step": 379500 | |
| }, | |
| { | |
| "epoch": 1.03, | |
| "learning_rate": 7.563025210084033e-05, | |
| "loss": 1.6397, | |
| "step": 380000 | |
| }, | |
| { | |
| "epoch": 1.03, | |
| "learning_rate": 7.531512605042016e-05, | |
| "loss": 1.6406, | |
| "step": 380500 | |
| }, | |
| { | |
| "epoch": 1.03, | |
| "learning_rate": 7.5e-05, | |
| "loss": 1.6399, | |
| "step": 381000 | |
| }, | |
| { | |
| "epoch": 1.03, | |
| "learning_rate": 7.468487394957982e-05, | |
| "loss": 1.6403, | |
| "step": 381500 | |
| }, | |
| { | |
| "epoch": 1.03, | |
| "learning_rate": 7.436974789915966e-05, | |
| "loss": 1.6394, | |
| "step": 382000 | |
| }, | |
| { | |
| "epoch": 1.04, | |
| "learning_rate": 7.405462184873949e-05, | |
| "loss": 1.6377, | |
| "step": 382500 | |
| }, | |
| { | |
| "epoch": 1.04, | |
| "learning_rate": 7.373949579831932e-05, | |
| "loss": 1.6365, | |
| "step": 383000 | |
| }, | |
| { | |
| "epoch": 1.04, | |
| "learning_rate": 7.342436974789915e-05, | |
| "loss": 1.6329, | |
| "step": 383500 | |
| }, | |
| { | |
| "epoch": 1.04, | |
| "learning_rate": 7.310924369747898e-05, | |
| "loss": 1.6361, | |
| "step": 384000 | |
| }, | |
| { | |
| "epoch": 1.04, | |
| "learning_rate": 7.279411764705881e-05, | |
| "loss": 1.6325, | |
| "step": 384500 | |
| }, | |
| { | |
| "epoch": 1.04, | |
| "learning_rate": 7.247899159663865e-05, | |
| "loss": 1.6347, | |
| "step": 385000 | |
| }, | |
| { | |
| "epoch": 1.04, | |
| "learning_rate": 7.216386554621849e-05, | |
| "loss": 1.6364, | |
| "step": 385500 | |
| }, | |
| { | |
| "epoch": 1.04, | |
| "learning_rate": 7.184873949579832e-05, | |
| "loss": 1.6293, | |
| "step": 386000 | |
| }, | |
| { | |
| "epoch": 1.05, | |
| "learning_rate": 7.153361344537815e-05, | |
| "loss": 1.6306, | |
| "step": 386500 | |
| }, | |
| { | |
| "epoch": 1.05, | |
| "learning_rate": 7.121848739495798e-05, | |
| "loss": 1.6308, | |
| "step": 387000 | |
| }, | |
| { | |
| "epoch": 1.05, | |
| "learning_rate": 7.090336134453781e-05, | |
| "loss": 1.6315, | |
| "step": 387500 | |
| }, | |
| { | |
| "epoch": 1.05, | |
| "learning_rate": 7.058823529411764e-05, | |
| "loss": 1.6326, | |
| "step": 388000 | |
| }, | |
| { | |
| "epoch": 1.05, | |
| "learning_rate": 7.027310924369747e-05, | |
| "loss": 1.6296, | |
| "step": 388500 | |
| }, | |
| { | |
| "epoch": 1.05, | |
| "learning_rate": 6.99579831932773e-05, | |
| "loss": 1.6332, | |
| "step": 389000 | |
| }, | |
| { | |
| "epoch": 1.05, | |
| "learning_rate": 6.964285714285713e-05, | |
| "loss": 1.6337, | |
| "step": 389500 | |
| }, | |
| { | |
| "epoch": 1.06, | |
| "learning_rate": 6.932773109243698e-05, | |
| "loss": 1.6279, | |
| "step": 390000 | |
| }, | |
| { | |
| "epoch": 1.06, | |
| "learning_rate": 6.90126050420168e-05, | |
| "loss": 1.6296, | |
| "step": 390500 | |
| }, | |
| { | |
| "epoch": 1.06, | |
| "learning_rate": 6.869747899159663e-05, | |
| "loss": 1.6244, | |
| "step": 391000 | |
| }, | |
| { | |
| "epoch": 1.06, | |
| "learning_rate": 6.838235294117646e-05, | |
| "loss": 1.6323, | |
| "step": 391500 | |
| }, | |
| { | |
| "epoch": 1.06, | |
| "learning_rate": 6.80672268907563e-05, | |
| "loss": 1.63, | |
| "step": 392000 | |
| }, | |
| { | |
| "epoch": 1.06, | |
| "learning_rate": 6.775210084033613e-05, | |
| "loss": 1.6253, | |
| "step": 392500 | |
| }, | |
| { | |
| "epoch": 1.06, | |
| "learning_rate": 6.743697478991596e-05, | |
| "loss": 1.623, | |
| "step": 393000 | |
| }, | |
| { | |
| "epoch": 1.07, | |
| "learning_rate": 6.71218487394958e-05, | |
| "loss": 1.6291, | |
| "step": 393500 | |
| }, | |
| { | |
| "epoch": 1.07, | |
| "learning_rate": 6.680672268907563e-05, | |
| "loss": 1.6264, | |
| "step": 394000 | |
| }, | |
| { | |
| "epoch": 1.07, | |
| "learning_rate": 6.649159663865546e-05, | |
| "loss": 1.6278, | |
| "step": 394500 | |
| }, | |
| { | |
| "epoch": 1.07, | |
| "learning_rate": 6.617647058823529e-05, | |
| "loss": 1.6274, | |
| "step": 395000 | |
| }, | |
| { | |
| "epoch": 1.07, | |
| "learning_rate": 6.586134453781512e-05, | |
| "loss": 1.6254, | |
| "step": 395500 | |
| }, | |
| { | |
| "epoch": 1.07, | |
| "learning_rate": 6.554621848739495e-05, | |
| "loss": 1.6237, | |
| "step": 396000 | |
| }, | |
| { | |
| "epoch": 1.07, | |
| "learning_rate": 6.523109243697478e-05, | |
| "loss": 1.6232, | |
| "step": 396500 | |
| }, | |
| { | |
| "epoch": 1.07, | |
| "learning_rate": 6.491596638655461e-05, | |
| "loss": 1.6204, | |
| "step": 397000 | |
| }, | |
| { | |
| "epoch": 1.08, | |
| "learning_rate": 6.460084033613446e-05, | |
| "loss": 1.6243, | |
| "step": 397500 | |
| }, | |
| { | |
| "epoch": 1.08, | |
| "learning_rate": 6.428571428571427e-05, | |
| "loss": 1.6234, | |
| "step": 398000 | |
| }, | |
| { | |
| "epoch": 1.08, | |
| "learning_rate": 6.39705882352941e-05, | |
| "loss": 1.6237, | |
| "step": 398500 | |
| }, | |
| { | |
| "epoch": 1.08, | |
| "learning_rate": 6.365546218487394e-05, | |
| "loss": 1.6239, | |
| "step": 399000 | |
| }, | |
| { | |
| "epoch": 1.08, | |
| "learning_rate": 6.334033613445378e-05, | |
| "loss": 1.6215, | |
| "step": 399500 | |
| }, | |
| { | |
| "epoch": 1.08, | |
| "learning_rate": 6.302521008403361e-05, | |
| "loss": 1.6229, | |
| "step": 400000 | |
| }, | |
| { | |
| "epoch": 1.08, | |
| "learning_rate": 6.271008403361344e-05, | |
| "loss": 1.6197, | |
| "step": 400500 | |
| }, | |
| { | |
| "epoch": 1.09, | |
| "learning_rate": 6.239495798319327e-05, | |
| "loss": 1.6183, | |
| "step": 401000 | |
| }, | |
| { | |
| "epoch": 1.09, | |
| "learning_rate": 6.20798319327731e-05, | |
| "loss": 1.6206, | |
| "step": 401500 | |
| }, | |
| { | |
| "epoch": 1.09, | |
| "learning_rate": 6.176470588235294e-05, | |
| "loss": 1.6182, | |
| "step": 402000 | |
| }, | |
| { | |
| "epoch": 1.09, | |
| "learning_rate": 6.144957983193277e-05, | |
| "loss": 1.621, | |
| "step": 402500 | |
| }, | |
| { | |
| "epoch": 1.09, | |
| "learning_rate": 6.11344537815126e-05, | |
| "loss": 1.6204, | |
| "step": 403000 | |
| }, | |
| { | |
| "epoch": 1.09, | |
| "learning_rate": 6.081932773109243e-05, | |
| "loss": 1.6213, | |
| "step": 403500 | |
| }, | |
| { | |
| "epoch": 1.09, | |
| "learning_rate": 6.0504201680672267e-05, | |
| "loss": 1.6187, | |
| "step": 404000 | |
| }, | |
| { | |
| "epoch": 1.09, | |
| "learning_rate": 6.01890756302521e-05, | |
| "loss": 1.617, | |
| "step": 404500 | |
| }, | |
| { | |
| "epoch": 1.1, | |
| "learning_rate": 5.987394957983193e-05, | |
| "loss": 1.6145, | |
| "step": 405000 | |
| }, | |
| { | |
| "epoch": 1.1, | |
| "learning_rate": 5.955882352941175e-05, | |
| "loss": 1.6146, | |
| "step": 405500 | |
| }, | |
| { | |
| "epoch": 1.1, | |
| "learning_rate": 5.924369747899159e-05, | |
| "loss": 1.615, | |
| "step": 406000 | |
| }, | |
| { | |
| "epoch": 1.1, | |
| "learning_rate": 5.892857142857142e-05, | |
| "loss": 1.6138, | |
| "step": 406500 | |
| }, | |
| { | |
| "epoch": 1.1, | |
| "learning_rate": 5.861344537815125e-05, | |
| "loss": 1.6158, | |
| "step": 407000 | |
| }, | |
| { | |
| "epoch": 1.1, | |
| "learning_rate": 5.829831932773109e-05, | |
| "loss": 1.6149, | |
| "step": 407500 | |
| }, | |
| { | |
| "epoch": 1.1, | |
| "learning_rate": 5.798319327731092e-05, | |
| "loss": 1.6198, | |
| "step": 408000 | |
| }, | |
| { | |
| "epoch": 1.11, | |
| "learning_rate": 5.766806722689075e-05, | |
| "loss": 1.6117, | |
| "step": 408500 | |
| }, | |
| { | |
| "epoch": 1.11, | |
| "learning_rate": 5.7352941176470576e-05, | |
| "loss": 1.6141, | |
| "step": 409000 | |
| }, | |
| { | |
| "epoch": 1.11, | |
| "learning_rate": 5.7037815126050414e-05, | |
| "loss": 1.6113, | |
| "step": 409500 | |
| }, | |
| { | |
| "epoch": 1.11, | |
| "learning_rate": 5.6722689075630245e-05, | |
| "loss": 1.6126, | |
| "step": 410000 | |
| }, | |
| { | |
| "epoch": 1.11, | |
| "learning_rate": 5.6407563025210076e-05, | |
| "loss": 1.612, | |
| "step": 410500 | |
| }, | |
| { | |
| "epoch": 1.11, | |
| "learning_rate": 5.6092436974789914e-05, | |
| "loss": 1.6067, | |
| "step": 411000 | |
| }, | |
| { | |
| "epoch": 1.11, | |
| "learning_rate": 5.5777310924369745e-05, | |
| "loss": 1.6128, | |
| "step": 411500 | |
| }, | |
| { | |
| "epoch": 1.12, | |
| "learning_rate": 5.5462184873949576e-05, | |
| "loss": 1.6148, | |
| "step": 412000 | |
| }, | |
| { | |
| "epoch": 1.12, | |
| "learning_rate": 5.5147058823529414e-05, | |
| "loss": 1.61, | |
| "step": 412500 | |
| }, | |
| { | |
| "epoch": 1.12, | |
| "learning_rate": 5.483193277310924e-05, | |
| "loss": 1.6094, | |
| "step": 413000 | |
| }, | |
| { | |
| "epoch": 1.12, | |
| "learning_rate": 5.451680672268907e-05, | |
| "loss": 1.6108, | |
| "step": 413500 | |
| }, | |
| { | |
| "epoch": 1.12, | |
| "learning_rate": 5.42016806722689e-05, | |
| "loss": 1.6029, | |
| "step": 414000 | |
| }, | |
| { | |
| "epoch": 1.12, | |
| "learning_rate": 5.388655462184874e-05, | |
| "loss": 1.608, | |
| "step": 414500 | |
| }, | |
| { | |
| "epoch": 1.12, | |
| "learning_rate": 5.357142857142857e-05, | |
| "loss": 1.6046, | |
| "step": 415000 | |
| }, | |
| { | |
| "epoch": 1.12, | |
| "learning_rate": 5.32563025210084e-05, | |
| "loss": 1.6096, | |
| "step": 415500 | |
| }, | |
| { | |
| "epoch": 1.13, | |
| "learning_rate": 5.294117647058824e-05, | |
| "loss": 1.6056, | |
| "step": 416000 | |
| }, | |
| { | |
| "epoch": 1.13, | |
| "learning_rate": 5.262605042016806e-05, | |
| "loss": 1.6055, | |
| "step": 416500 | |
| }, | |
| { | |
| "epoch": 1.13, | |
| "learning_rate": 5.231092436974789e-05, | |
| "loss": 1.6019, | |
| "step": 417000 | |
| }, | |
| { | |
| "epoch": 1.13, | |
| "learning_rate": 5.1995798319327724e-05, | |
| "loss": 1.603, | |
| "step": 417500 | |
| }, | |
| { | |
| "epoch": 1.13, | |
| "learning_rate": 5.168067226890756e-05, | |
| "loss": 1.6007, | |
| "step": 418000 | |
| }, | |
| { | |
| "epoch": 1.13, | |
| "learning_rate": 5.136554621848739e-05, | |
| "loss": 1.6038, | |
| "step": 418500 | |
| }, | |
| { | |
| "epoch": 1.13, | |
| "learning_rate": 5.105042016806722e-05, | |
| "loss": 1.6017, | |
| "step": 419000 | |
| }, | |
| { | |
| "epoch": 1.14, | |
| "learning_rate": 5.0735294117647054e-05, | |
| "loss": 1.6051, | |
| "step": 419500 | |
| }, | |
| { | |
| "epoch": 1.14, | |
| "learning_rate": 5.042016806722689e-05, | |
| "loss": 1.5992, | |
| "step": 420000 | |
| }, | |
| { | |
| "epoch": 1.14, | |
| "learning_rate": 5.0105042016806716e-05, | |
| "loss": 1.6022, | |
| "step": 420500 | |
| }, | |
| { | |
| "epoch": 1.14, | |
| "learning_rate": 4.978991596638655e-05, | |
| "loss": 1.6038, | |
| "step": 421000 | |
| }, | |
| { | |
| "epoch": 1.14, | |
| "learning_rate": 4.947478991596638e-05, | |
| "loss": 1.6019, | |
| "step": 421500 | |
| }, | |
| { | |
| "epoch": 1.14, | |
| "learning_rate": 4.9159663865546216e-05, | |
| "loss": 1.6006, | |
| "step": 422000 | |
| }, | |
| { | |
| "epoch": 1.14, | |
| "learning_rate": 4.884453781512605e-05, | |
| "loss": 1.6046, | |
| "step": 422500 | |
| }, | |
| { | |
| "epoch": 1.15, | |
| "learning_rate": 4.852941176470588e-05, | |
| "loss": 1.6049, | |
| "step": 423000 | |
| }, | |
| { | |
| "epoch": 1.15, | |
| "learning_rate": 4.8214285714285716e-05, | |
| "loss": 1.6004, | |
| "step": 423500 | |
| }, | |
| { | |
| "epoch": 1.15, | |
| "learning_rate": 4.789915966386554e-05, | |
| "loss": 1.6016, | |
| "step": 424000 | |
| }, | |
| { | |
| "epoch": 1.15, | |
| "learning_rate": 4.758403361344537e-05, | |
| "loss": 1.6024, | |
| "step": 424500 | |
| }, | |
| { | |
| "epoch": 1.15, | |
| "learning_rate": 4.72689075630252e-05, | |
| "loss": 1.5994, | |
| "step": 425000 | |
| }, | |
| { | |
| "epoch": 1.15, | |
| "learning_rate": 4.695378151260504e-05, | |
| "loss": 1.5989, | |
| "step": 425500 | |
| }, | |
| { | |
| "epoch": 1.15, | |
| "learning_rate": 4.663865546218487e-05, | |
| "loss": 1.599, | |
| "step": 426000 | |
| }, | |
| { | |
| "epoch": 1.15, | |
| "learning_rate": 4.63235294117647e-05, | |
| "loss": 1.5968, | |
| "step": 426500 | |
| }, | |
| { | |
| "epoch": 1.16, | |
| "learning_rate": 4.600840336134454e-05, | |
| "loss": 1.5968, | |
| "step": 427000 | |
| }, | |
| { | |
| "epoch": 1.16, | |
| "learning_rate": 4.5693277310924364e-05, | |
| "loss": 1.5981, | |
| "step": 427500 | |
| }, | |
| { | |
| "epoch": 1.16, | |
| "learning_rate": 4.5378151260504195e-05, | |
| "loss": 1.5961, | |
| "step": 428000 | |
| }, | |
| { | |
| "epoch": 1.16, | |
| "learning_rate": 4.5063025210084026e-05, | |
| "loss": 1.5967, | |
| "step": 428500 | |
| }, | |
| { | |
| "epoch": 1.16, | |
| "learning_rate": 4.4747899159663864e-05, | |
| "loss": 1.5963, | |
| "step": 429000 | |
| }, | |
| { | |
| "epoch": 1.16, | |
| "learning_rate": 4.4432773109243695e-05, | |
| "loss": 1.5937, | |
| "step": 429500 | |
| }, | |
| { | |
| "epoch": 1.16, | |
| "learning_rate": 4.4117647058823526e-05, | |
| "loss": 1.5963, | |
| "step": 430000 | |
| }, | |
| { | |
| "epoch": 1.17, | |
| "learning_rate": 4.380252100840336e-05, | |
| "loss": 1.5961, | |
| "step": 430500 | |
| }, | |
| { | |
| "epoch": 1.17, | |
| "learning_rate": 4.3487394957983194e-05, | |
| "loss": 1.5955, | |
| "step": 431000 | |
| }, | |
| { | |
| "epoch": 1.17, | |
| "learning_rate": 4.317226890756302e-05, | |
| "loss": 1.5905, | |
| "step": 431500 | |
| }, | |
| { | |
| "epoch": 1.17, | |
| "learning_rate": 4.285714285714285e-05, | |
| "loss": 1.5956, | |
| "step": 432000 | |
| }, | |
| { | |
| "epoch": 1.17, | |
| "learning_rate": 4.254201680672269e-05, | |
| "loss": 1.5938, | |
| "step": 432500 | |
| }, | |
| { | |
| "epoch": 1.17, | |
| "learning_rate": 4.222689075630252e-05, | |
| "loss": 1.5939, | |
| "step": 433000 | |
| }, | |
| { | |
| "epoch": 1.17, | |
| "learning_rate": 4.191176470588235e-05, | |
| "loss": 1.5919, | |
| "step": 433500 | |
| }, | |
| { | |
| "epoch": 1.17, | |
| "learning_rate": 4.159663865546218e-05, | |
| "loss": 1.5905, | |
| "step": 434000 | |
| }, | |
| { | |
| "epoch": 1.18, | |
| "learning_rate": 4.128151260504202e-05, | |
| "loss": 1.589, | |
| "step": 434500 | |
| }, | |
| { | |
| "epoch": 1.18, | |
| "learning_rate": 4.096638655462184e-05, | |
| "loss": 1.592, | |
| "step": 435000 | |
| }, | |
| { | |
| "epoch": 1.18, | |
| "learning_rate": 4.065126050420167e-05, | |
| "loss": 1.594, | |
| "step": 435500 | |
| }, | |
| { | |
| "epoch": 1.18, | |
| "learning_rate": 4.0336134453781504e-05, | |
| "loss": 1.5908, | |
| "step": 436000 | |
| }, | |
| { | |
| "epoch": 1.18, | |
| "learning_rate": 4.002100840336134e-05, | |
| "loss": 1.5876, | |
| "step": 436500 | |
| }, | |
| { | |
| "epoch": 1.18, | |
| "learning_rate": 3.970588235294117e-05, | |
| "loss": 1.5899, | |
| "step": 437000 | |
| }, | |
| { | |
| "epoch": 1.18, | |
| "learning_rate": 3.9390756302521004e-05, | |
| "loss": 1.5912, | |
| "step": 437500 | |
| }, | |
| { | |
| "epoch": 1.19, | |
| "learning_rate": 3.907563025210084e-05, | |
| "loss": 1.5885, | |
| "step": 438000 | |
| }, | |
| { | |
| "epoch": 1.19, | |
| "learning_rate": 3.876050420168067e-05, | |
| "loss": 1.5914, | |
| "step": 438500 | |
| }, | |
| { | |
| "epoch": 1.19, | |
| "learning_rate": 3.84453781512605e-05, | |
| "loss": 1.5905, | |
| "step": 439000 | |
| }, | |
| { | |
| "epoch": 1.19, | |
| "learning_rate": 3.813025210084033e-05, | |
| "loss": 1.5885, | |
| "step": 439500 | |
| }, | |
| { | |
| "epoch": 1.19, | |
| "learning_rate": 3.7815126050420166e-05, | |
| "loss": 1.5861, | |
| "step": 440000 | |
| }, | |
| { | |
| "epoch": 1.19, | |
| "learning_rate": 3.75e-05, | |
| "loss": 1.5877, | |
| "step": 440500 | |
| }, | |
| { | |
| "epoch": 1.19, | |
| "learning_rate": 3.718487394957983e-05, | |
| "loss": 1.5846, | |
| "step": 441000 | |
| }, | |
| { | |
| "epoch": 1.2, | |
| "learning_rate": 3.686974789915966e-05, | |
| "loss": 1.5875, | |
| "step": 441500 | |
| }, | |
| { | |
| "epoch": 1.2, | |
| "learning_rate": 3.655462184873949e-05, | |
| "loss": 1.5854, | |
| "step": 442000 | |
| }, | |
| { | |
| "epoch": 1.2, | |
| "learning_rate": 3.623949579831933e-05, | |
| "loss": 1.5824, | |
| "step": 442500 | |
| }, | |
| { | |
| "epoch": 1.2, | |
| "learning_rate": 3.592436974789916e-05, | |
| "loss": 1.5847, | |
| "step": 443000 | |
| }, | |
| { | |
| "epoch": 1.2, | |
| "learning_rate": 3.560924369747899e-05, | |
| "loss": 1.5848, | |
| "step": 443500 | |
| }, | |
| { | |
| "epoch": 1.2, | |
| "learning_rate": 3.529411764705882e-05, | |
| "loss": 1.5862, | |
| "step": 444000 | |
| }, | |
| { | |
| "epoch": 1.2, | |
| "learning_rate": 3.497899159663865e-05, | |
| "loss": 1.583, | |
| "step": 444500 | |
| }, | |
| { | |
| "epoch": 1.2, | |
| "learning_rate": 3.466386554621849e-05, | |
| "loss": 1.5854, | |
| "step": 445000 | |
| }, | |
| { | |
| "epoch": 1.21, | |
| "learning_rate": 3.4348739495798313e-05, | |
| "loss": 1.584, | |
| "step": 445500 | |
| }, | |
| { | |
| "epoch": 1.21, | |
| "learning_rate": 3.403361344537815e-05, | |
| "loss": 1.5825, | |
| "step": 446000 | |
| }, | |
| { | |
| "epoch": 1.21, | |
| "learning_rate": 3.371848739495798e-05, | |
| "loss": 1.5825, | |
| "step": 446500 | |
| }, | |
| { | |
| "epoch": 1.21, | |
| "learning_rate": 3.340336134453781e-05, | |
| "loss": 1.58, | |
| "step": 447000 | |
| }, | |
| { | |
| "epoch": 1.21, | |
| "learning_rate": 3.3088235294117644e-05, | |
| "loss": 1.5808, | |
| "step": 447500 | |
| }, | |
| { | |
| "epoch": 1.21, | |
| "learning_rate": 3.2773109243697475e-05, | |
| "loss": 1.5785, | |
| "step": 448000 | |
| }, | |
| { | |
| "epoch": 1.21, | |
| "learning_rate": 3.2457983193277306e-05, | |
| "loss": 1.5825, | |
| "step": 448500 | |
| }, | |
| { | |
| "epoch": 1.22, | |
| "learning_rate": 3.214285714285714e-05, | |
| "loss": 1.5783, | |
| "step": 449000 | |
| }, | |
| { | |
| "epoch": 1.22, | |
| "learning_rate": 3.182773109243697e-05, | |
| "loss": 1.5762, | |
| "step": 449500 | |
| }, | |
| { | |
| "epoch": 1.22, | |
| "learning_rate": 3.1512605042016806e-05, | |
| "loss": 1.5771, | |
| "step": 450000 | |
| }, | |
| { | |
| "epoch": 1.22, | |
| "learning_rate": 3.119747899159664e-05, | |
| "loss": 1.5822, | |
| "step": 450500 | |
| }, | |
| { | |
| "epoch": 1.22, | |
| "learning_rate": 3.088235294117647e-05, | |
| "loss": 1.5834, | |
| "step": 451000 | |
| }, | |
| { | |
| "epoch": 1.22, | |
| "learning_rate": 3.05672268907563e-05, | |
| "loss": 1.5763, | |
| "step": 451500 | |
| }, | |
| { | |
| "epoch": 1.22, | |
| "learning_rate": 3.0252100840336133e-05, | |
| "loss": 1.5771, | |
| "step": 452000 | |
| }, | |
| { | |
| "epoch": 1.22, | |
| "learning_rate": 2.9936974789915964e-05, | |
| "loss": 1.5771, | |
| "step": 452500 | |
| }, | |
| { | |
| "epoch": 1.23, | |
| "learning_rate": 2.9621848739495795e-05, | |
| "loss": 1.5819, | |
| "step": 453000 | |
| }, | |
| { | |
| "epoch": 1.23, | |
| "learning_rate": 2.9306722689075626e-05, | |
| "loss": 1.5786, | |
| "step": 453500 | |
| }, | |
| { | |
| "epoch": 1.23, | |
| "learning_rate": 2.899159663865546e-05, | |
| "loss": 1.579, | |
| "step": 454000 | |
| }, | |
| { | |
| "epoch": 1.23, | |
| "learning_rate": 2.8676470588235288e-05, | |
| "loss": 1.5774, | |
| "step": 454500 | |
| }, | |
| { | |
| "epoch": 1.23, | |
| "learning_rate": 2.8361344537815123e-05, | |
| "loss": 1.5733, | |
| "step": 455000 | |
| }, | |
| { | |
| "epoch": 1.23, | |
| "learning_rate": 2.8046218487394957e-05, | |
| "loss": 1.5743, | |
| "step": 455500 | |
| }, | |
| { | |
| "epoch": 1.23, | |
| "learning_rate": 2.7731092436974788e-05, | |
| "loss": 1.5715, | |
| "step": 456000 | |
| }, | |
| { | |
| "epoch": 1.24, | |
| "learning_rate": 2.741596638655462e-05, | |
| "loss": 1.5767, | |
| "step": 456500 | |
| }, | |
| { | |
| "epoch": 1.24, | |
| "learning_rate": 2.710084033613445e-05, | |
| "loss": 1.5753, | |
| "step": 457000 | |
| }, | |
| { | |
| "epoch": 1.24, | |
| "learning_rate": 2.6785714285714284e-05, | |
| "loss": 1.5726, | |
| "step": 457500 | |
| }, | |
| { | |
| "epoch": 1.24, | |
| "learning_rate": 2.647058823529412e-05, | |
| "loss": 1.5729, | |
| "step": 458000 | |
| }, | |
| { | |
| "epoch": 1.24, | |
| "learning_rate": 2.6155462184873946e-05, | |
| "loss": 1.5735, | |
| "step": 458500 | |
| }, | |
| { | |
| "epoch": 1.24, | |
| "learning_rate": 2.584033613445378e-05, | |
| "loss": 1.5719, | |
| "step": 459000 | |
| }, | |
| { | |
| "epoch": 1.24, | |
| "learning_rate": 2.552521008403361e-05, | |
| "loss": 1.5673, | |
| "step": 459500 | |
| }, | |
| { | |
| "epoch": 1.25, | |
| "learning_rate": 2.5210084033613446e-05, | |
| "loss": 1.5746, | |
| "step": 460000 | |
| }, | |
| { | |
| "epoch": 1.25, | |
| "learning_rate": 2.4894957983193274e-05, | |
| "loss": 1.5715, | |
| "step": 460500 | |
| }, | |
| { | |
| "epoch": 1.25, | |
| "learning_rate": 2.4579831932773108e-05, | |
| "loss": 1.5698, | |
| "step": 461000 | |
| }, | |
| { | |
| "epoch": 1.25, | |
| "learning_rate": 2.426470588235294e-05, | |
| "loss": 1.569, | |
| "step": 461500 | |
| }, | |
| { | |
| "epoch": 1.25, | |
| "learning_rate": 2.394957983193277e-05, | |
| "loss": 1.5693, | |
| "step": 462000 | |
| }, | |
| { | |
| "epoch": 1.25, | |
| "learning_rate": 2.36344537815126e-05, | |
| "loss": 1.5718, | |
| "step": 462500 | |
| }, | |
| { | |
| "epoch": 1.25, | |
| "learning_rate": 2.3319327731092435e-05, | |
| "loss": 1.5704, | |
| "step": 463000 | |
| }, | |
| { | |
| "epoch": 1.25, | |
| "learning_rate": 2.300420168067227e-05, | |
| "loss": 1.566, | |
| "step": 463500 | |
| }, | |
| { | |
| "epoch": 1.26, | |
| "learning_rate": 2.2689075630252097e-05, | |
| "loss": 1.5702, | |
| "step": 464000 | |
| }, | |
| { | |
| "epoch": 1.26, | |
| "learning_rate": 2.2373949579831932e-05, | |
| "loss": 1.572, | |
| "step": 464500 | |
| }, | |
| { | |
| "epoch": 1.26, | |
| "learning_rate": 2.2058823529411763e-05, | |
| "loss": 1.5689, | |
| "step": 465000 | |
| }, | |
| { | |
| "epoch": 1.26, | |
| "learning_rate": 2.1743697478991597e-05, | |
| "loss": 1.5692, | |
| "step": 465500 | |
| }, | |
| { | |
| "epoch": 1.26, | |
| "learning_rate": 2.1428571428571425e-05, | |
| "loss": 1.5679, | |
| "step": 466000 | |
| }, | |
| { | |
| "epoch": 1.26, | |
| "learning_rate": 2.111344537815126e-05, | |
| "loss": 1.5645, | |
| "step": 466500 | |
| }, | |
| { | |
| "epoch": 1.26, | |
| "learning_rate": 2.079831932773109e-05, | |
| "loss": 1.5667, | |
| "step": 467000 | |
| }, | |
| { | |
| "epoch": 1.27, | |
| "learning_rate": 2.048319327731092e-05, | |
| "loss": 1.5659, | |
| "step": 467500 | |
| }, | |
| { | |
| "epoch": 1.27, | |
| "learning_rate": 2.0168067226890752e-05, | |
| "loss": 1.5628, | |
| "step": 468000 | |
| }, | |
| { | |
| "epoch": 1.27, | |
| "learning_rate": 1.9852941176470586e-05, | |
| "loss": 1.5622, | |
| "step": 468500 | |
| }, | |
| { | |
| "epoch": 1.27, | |
| "learning_rate": 1.953781512605042e-05, | |
| "loss": 1.5674, | |
| "step": 469000 | |
| }, | |
| { | |
| "epoch": 1.27, | |
| "learning_rate": 1.922268907563025e-05, | |
| "loss": 1.5645, | |
| "step": 469500 | |
| }, | |
| { | |
| "epoch": 1.27, | |
| "learning_rate": 1.8907563025210083e-05, | |
| "loss": 1.5647, | |
| "step": 470000 | |
| }, | |
| { | |
| "epoch": 1.27, | |
| "learning_rate": 1.8592436974789914e-05, | |
| "loss": 1.5641, | |
| "step": 470500 | |
| }, | |
| { | |
| "epoch": 1.28, | |
| "learning_rate": 1.8277310924369745e-05, | |
| "loss": 1.5656, | |
| "step": 471000 | |
| }, | |
| { | |
| "epoch": 1.28, | |
| "learning_rate": 1.796218487394958e-05, | |
| "loss": 1.5635, | |
| "step": 471500 | |
| }, | |
| { | |
| "epoch": 1.28, | |
| "learning_rate": 1.764705882352941e-05, | |
| "loss": 1.5612, | |
| "step": 472000 | |
| }, | |
| { | |
| "epoch": 1.28, | |
| "learning_rate": 1.7331932773109245e-05, | |
| "loss": 1.5619, | |
| "step": 472500 | |
| }, | |
| { | |
| "epoch": 1.28, | |
| "learning_rate": 1.7016806722689076e-05, | |
| "loss": 1.5643, | |
| "step": 473000 | |
| }, | |
| { | |
| "epoch": 1.28, | |
| "learning_rate": 1.6701680672268907e-05, | |
| "loss": 1.5607, | |
| "step": 473500 | |
| }, | |
| { | |
| "epoch": 1.28, | |
| "learning_rate": 1.6386554621848738e-05, | |
| "loss": 1.5642, | |
| "step": 474000 | |
| }, | |
| { | |
| "epoch": 1.28, | |
| "learning_rate": 1.607142857142857e-05, | |
| "loss": 1.5603, | |
| "step": 474500 | |
| }, | |
| { | |
| "epoch": 1.29, | |
| "learning_rate": 1.5756302521008403e-05, | |
| "loss": 1.5591, | |
| "step": 475000 | |
| }, | |
| { | |
| "epoch": 1.29, | |
| "learning_rate": 1.5441176470588234e-05, | |
| "loss": 1.5632, | |
| "step": 475500 | |
| }, | |
| { | |
| "epoch": 1.29, | |
| "learning_rate": 1.5126050420168067e-05, | |
| "loss": 1.5606, | |
| "step": 476000 | |
| }, | |
| { | |
| "epoch": 1.29, | |
| "learning_rate": 1.4810924369747898e-05, | |
| "loss": 1.5598, | |
| "step": 476500 | |
| }, | |
| { | |
| "epoch": 1.29, | |
| "learning_rate": 1.449579831932773e-05, | |
| "loss": 1.5616, | |
| "step": 477000 | |
| }, | |
| { | |
| "epoch": 1.29, | |
| "learning_rate": 1.4180672268907561e-05, | |
| "loss": 1.5609, | |
| "step": 477500 | |
| }, | |
| { | |
| "epoch": 1.29, | |
| "learning_rate": 1.3865546218487394e-05, | |
| "loss": 1.56, | |
| "step": 478000 | |
| }, | |
| { | |
| "epoch": 1.3, | |
| "learning_rate": 1.3550420168067225e-05, | |
| "loss": 1.5609, | |
| "step": 478500 | |
| }, | |
| { | |
| "epoch": 1.3, | |
| "learning_rate": 1.323529411764706e-05, | |
| "loss": 1.5603, | |
| "step": 479000 | |
| }, | |
| { | |
| "epoch": 1.3, | |
| "learning_rate": 1.292016806722689e-05, | |
| "loss": 1.5612, | |
| "step": 479500 | |
| }, | |
| { | |
| "epoch": 1.3, | |
| "learning_rate": 1.2605042016806723e-05, | |
| "loss": 1.5655, | |
| "step": 480000 | |
| }, | |
| { | |
| "epoch": 1.3, | |
| "learning_rate": 1.2289915966386554e-05, | |
| "loss": 1.5588, | |
| "step": 480500 | |
| }, | |
| { | |
| "epoch": 1.3, | |
| "learning_rate": 1.1974789915966385e-05, | |
| "loss": 1.561, | |
| "step": 481000 | |
| }, | |
| { | |
| "epoch": 1.3, | |
| "learning_rate": 1.1659663865546218e-05, | |
| "loss": 1.5585, | |
| "step": 481500 | |
| }, | |
| { | |
| "epoch": 1.3, | |
| "learning_rate": 1.1344537815126049e-05, | |
| "loss": 1.5569, | |
| "step": 482000 | |
| }, | |
| { | |
| "epoch": 1.31, | |
| "learning_rate": 1.1029411764705881e-05, | |
| "loss": 1.5576, | |
| "step": 482500 | |
| }, | |
| { | |
| "epoch": 1.31, | |
| "learning_rate": 1.0714285714285712e-05, | |
| "loss": 1.5551, | |
| "step": 483000 | |
| }, | |
| { | |
| "epoch": 1.31, | |
| "learning_rate": 1.0399159663865545e-05, | |
| "loss": 1.5576, | |
| "step": 483500 | |
| }, | |
| { | |
| "epoch": 1.31, | |
| "learning_rate": 1.0084033613445376e-05, | |
| "loss": 1.558, | |
| "step": 484000 | |
| }, | |
| { | |
| "epoch": 1.31, | |
| "learning_rate": 9.76890756302521e-06, | |
| "loss": 1.5595, | |
| "step": 484500 | |
| }, | |
| { | |
| "epoch": 1.31, | |
| "learning_rate": 9.453781512605041e-06, | |
| "loss": 1.5606, | |
| "step": 485000 | |
| }, | |
| { | |
| "epoch": 1.31, | |
| "learning_rate": 9.138655462184872e-06, | |
| "loss": 1.5629, | |
| "step": 485500 | |
| }, | |
| { | |
| "epoch": 1.32, | |
| "learning_rate": 8.823529411764705e-06, | |
| "loss": 1.5605, | |
| "step": 486000 | |
| }, | |
| { | |
| "epoch": 1.32, | |
| "learning_rate": 8.508403361344538e-06, | |
| "loss": 1.5582, | |
| "step": 486500 | |
| }, | |
| { | |
| "epoch": 1.32, | |
| "learning_rate": 8.193277310924369e-06, | |
| "loss": 1.5558, | |
| "step": 487000 | |
| }, | |
| { | |
| "epoch": 1.32, | |
| "learning_rate": 7.878151260504201e-06, | |
| "loss": 1.5562, | |
| "step": 487500 | |
| }, | |
| { | |
| "epoch": 1.32, | |
| "learning_rate": 7.563025210084033e-06, | |
| "loss": 1.559, | |
| "step": 488000 | |
| }, | |
| { | |
| "epoch": 1.32, | |
| "learning_rate": 7.247899159663865e-06, | |
| "loss": 1.5608, | |
| "step": 488500 | |
| }, | |
| { | |
| "epoch": 1.32, | |
| "learning_rate": 6.932773109243697e-06, | |
| "loss": 1.5595, | |
| "step": 489000 | |
| }, | |
| { | |
| "epoch": 1.33, | |
| "learning_rate": 6.61764705882353e-06, | |
| "loss": 1.5569, | |
| "step": 489500 | |
| }, | |
| { | |
| "epoch": 1.33, | |
| "learning_rate": 6.3025210084033615e-06, | |
| "loss": 1.5545, | |
| "step": 490000 | |
| }, | |
| { | |
| "epoch": 1.33, | |
| "learning_rate": 5.9873949579831925e-06, | |
| "loss": 1.557, | |
| "step": 490500 | |
| }, | |
| { | |
| "epoch": 1.33, | |
| "learning_rate": 5.672268907563024e-06, | |
| "loss": 1.5559, | |
| "step": 491000 | |
| }, | |
| { | |
| "epoch": 1.33, | |
| "learning_rate": 5.357142857142856e-06, | |
| "loss": 1.5577, | |
| "step": 491500 | |
| }, | |
| { | |
| "epoch": 1.33, | |
| "learning_rate": 5.042016806722688e-06, | |
| "loss": 1.5555, | |
| "step": 492000 | |
| }, | |
| { | |
| "epoch": 1.33, | |
| "learning_rate": 4.726890756302521e-06, | |
| "loss": 1.5554, | |
| "step": 492500 | |
| }, | |
| { | |
| "epoch": 1.33, | |
| "learning_rate": 4.4117647058823526e-06, | |
| "loss": 1.5552, | |
| "step": 493000 | |
| }, | |
| { | |
| "epoch": 1.34, | |
| "learning_rate": 4.096638655462184e-06, | |
| "loss": 1.5573, | |
| "step": 493500 | |
| }, | |
| { | |
| "epoch": 1.34, | |
| "learning_rate": 3.7815126050420167e-06, | |
| "loss": 1.5555, | |
| "step": 494000 | |
| }, | |
| { | |
| "epoch": 1.34, | |
| "learning_rate": 3.4663865546218485e-06, | |
| "loss": 1.5551, | |
| "step": 494500 | |
| }, | |
| { | |
| "epoch": 1.34, | |
| "learning_rate": 3.1512605042016808e-06, | |
| "loss": 1.5499, | |
| "step": 495000 | |
| }, | |
| { | |
| "epoch": 1.34, | |
| "learning_rate": 2.836134453781512e-06, | |
| "loss": 1.5534, | |
| "step": 495500 | |
| }, | |
| { | |
| "epoch": 1.34, | |
| "learning_rate": 2.521008403361344e-06, | |
| "loss": 1.5509, | |
| "step": 496000 | |
| }, | |
| { | |
| "epoch": 1.34, | |
| "learning_rate": 2.2058823529411763e-06, | |
| "loss": 1.5551, | |
| "step": 496500 | |
| }, | |
| { | |
| "epoch": 1.35, | |
| "learning_rate": 1.8907563025210083e-06, | |
| "loss": 1.5567, | |
| "step": 497000 | |
| }, | |
| { | |
| "epoch": 1.35, | |
| "learning_rate": 1.5756302521008404e-06, | |
| "loss": 1.5546, | |
| "step": 497500 | |
| }, | |
| { | |
| "epoch": 1.35, | |
| "learning_rate": 1.260504201680672e-06, | |
| "loss": 1.5567, | |
| "step": 498000 | |
| }, | |
| { | |
| "epoch": 1.35, | |
| "learning_rate": 9.453781512605042e-07, | |
| "loss": 1.5546, | |
| "step": 498500 | |
| }, | |
| { | |
| "epoch": 1.35, | |
| "learning_rate": 6.30252100840336e-07, | |
| "loss": 1.5575, | |
| "step": 499000 | |
| }, | |
| { | |
| "epoch": 1.35, | |
| "learning_rate": 3.15126050420168e-07, | |
| "loss": 1.5552, | |
| "step": 499500 | |
| }, | |
| { | |
| "epoch": 1.35, | |
| "learning_rate": 0.0, | |
| "loss": 1.552, | |
| "step": 500000 | |
| }, | |
| { | |
| "epoch": 1.35, | |
| "step": 500000, | |
| "total_flos": 8.422691657052488e+18, | |
| "train_loss": 1.8320032868652343, | |
| "train_runtime": 99252.1755, | |
| "train_samples_per_second": 1289.644, | |
| "train_steps_per_second": 5.038 | |
| } | |
| ], | |
| "logging_steps": 500, | |
| "max_steps": 500000, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 2, | |
| "save_steps": 10000, | |
| "total_flos": 8.422691657052488e+18, | |
| "train_batch_size": 256, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |