| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 0.4793315743183817, | |
| "eval_steps": 500, | |
| "global_step": 545, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.0, | |
| "learning_rate": 1.999996182768104e-05, | |
| "loss": 1.6889, | |
| "step": 1 | |
| }, | |
| { | |
| "epoch": 0.0, | |
| "learning_rate": 1.9999045706597178e-05, | |
| "loss": 1.5727, | |
| "step": 5 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 1.999618300852388e-05, | |
| "loss": 1.5409, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 1.999141245215089e-05, | |
| "loss": 1.4338, | |
| "step": 15 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 1.99847349479803e-05, | |
| "loss": 1.5329, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 1.9976151770471746e-05, | |
| "loss": 1.4317, | |
| "step": 25 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 1.9965664557799164e-05, | |
| "loss": 1.518, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 1.9953275311538124e-05, | |
| "loss": 1.4521, | |
| "step": 35 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 1.993898639628382e-05, | |
| "loss": 1.46, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 1.992280053919977e-05, | |
| "loss": 1.3848, | |
| "step": 45 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 1.99047208294973e-05, | |
| "loss": 1.3426, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 1.9884750717845945e-05, | |
| "loss": 1.3429, | |
| "step": 55 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 1.9862894015714866e-05, | |
| "loss": 1.3541, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "learning_rate": 1.9839154894645393e-05, | |
| "loss": 1.309, | |
| "step": 65 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "learning_rate": 1.9813537885454854e-05, | |
| "loss": 1.3096, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "learning_rate": 1.9786047877371823e-05, | |
| "loss": 1.2657, | |
| "step": 75 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "learning_rate": 1.975669011710297e-05, | |
| "loss": 1.3209, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "learning_rate": 1.972547020783168e-05, | |
| "loss": 1.2896, | |
| "step": 85 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "learning_rate": 1.969239410814865e-05, | |
| "loss": 1.2634, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "learning_rate": 1.9657468130914626e-05, | |
| "loss": 1.2756, | |
| "step": 95 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "learning_rate": 1.962069894205553e-05, | |
| "loss": 1.2675, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "learning_rate": 1.9582093559290242e-05, | |
| "loss": 1.2907, | |
| "step": 105 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "learning_rate": 1.9541659350791174e-05, | |
| "loss": 1.2533, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "learning_rate": 1.9499404033778016e-05, | |
| "loss": 1.2639, | |
| "step": 115 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "learning_rate": 1.9455335673044817e-05, | |
| "loss": 1.3001, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "learning_rate": 1.9409462679420757e-05, | |
| "loss": 1.2534, | |
| "step": 125 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "learning_rate": 1.9361793808164878e-05, | |
| "loss": 1.3083, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "learning_rate": 1.931233815729505e-05, | |
| "loss": 1.2891, | |
| "step": 135 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "learning_rate": 1.9261105165851546e-05, | |
| "loss": 1.2565, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.13, | |
| "learning_rate": 1.920810461209551e-05, | |
| "loss": 1.265, | |
| "step": 145 | |
| }, | |
| { | |
| "epoch": 0.13, | |
| "learning_rate": 1.9153346611642706e-05, | |
| "loss": 1.253, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "learning_rate": 1.909684161553285e-05, | |
| "loss": 1.299, | |
| "step": 155 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "learning_rate": 1.903860040823494e-05, | |
| "loss": 1.2302, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "learning_rate": 1.8978634105588963e-05, | |
| "loss": 1.2977, | |
| "step": 165 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "learning_rate": 1.8916954152684315e-05, | |
| "loss": 1.2638, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "learning_rate": 1.8853572321675428e-05, | |
| "loss": 1.2513, | |
| "step": 175 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "learning_rate": 1.8788500709534934e-05, | |
| "loss": 1.2284, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "learning_rate": 1.8721751735744873e-05, | |
| "loss": 1.234, | |
| "step": 185 | |
| }, | |
| { | |
| "epoch": 0.17, | |
| "learning_rate": 1.8653338139926313e-05, | |
| "loss": 1.2946, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.17, | |
| "learning_rate": 1.8583272979407885e-05, | |
| "loss": 1.2923, | |
| "step": 195 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "learning_rate": 1.8511569626733673e-05, | |
| "loss": 1.3634, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "learning_rate": 1.8438241767110972e-05, | |
| "loss": 1.3127, | |
| "step": 205 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "learning_rate": 1.8363303395798305e-05, | |
| "loss": 1.2428, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 0.19, | |
| "learning_rate": 1.828676881543435e-05, | |
| "loss": 1.2556, | |
| "step": 215 | |
| }, | |
| { | |
| "epoch": 0.19, | |
| "learning_rate": 1.8208652633308136e-05, | |
| "loss": 1.2945, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "learning_rate": 1.812896975857111e-05, | |
| "loss": 1.2345, | |
| "step": 225 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "learning_rate": 1.804773539939161e-05, | |
| "loss": 1.2595, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 0.21, | |
| "learning_rate": 1.7964965060052243e-05, | |
| "loss": 1.3029, | |
| "step": 235 | |
| }, | |
| { | |
| "epoch": 0.21, | |
| "learning_rate": 1.788067453799077e-05, | |
| "loss": 1.2565, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.22, | |
| "learning_rate": 1.7794879920785015e-05, | |
| "loss": 1.2432, | |
| "step": 245 | |
| }, | |
| { | |
| "epoch": 0.22, | |
| "learning_rate": 1.770759758308241e-05, | |
| "loss": 1.2086, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.22, | |
| "learning_rate": 1.7618844183474775e-05, | |
| "loss": 1.2351, | |
| "step": 255 | |
| }, | |
| { | |
| "epoch": 0.23, | |
| "learning_rate": 1.752863666131885e-05, | |
| "loss": 1.2635, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 0.23, | |
| "learning_rate": 1.7436992233503288e-05, | |
| "loss": 1.1707, | |
| "step": 265 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "learning_rate": 1.7343928391162673e-05, | |
| "loss": 1.2742, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "learning_rate": 1.7249462896339153e-05, | |
| "loss": 1.2808, | |
| "step": 275 | |
| }, | |
| { | |
| "epoch": 0.25, | |
| "learning_rate": 1.7153613778592435e-05, | |
| "loss": 1.2544, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 0.25, | |
| "learning_rate": 1.705639933155866e-05, | |
| "loss": 1.2833, | |
| "step": 285 | |
| }, | |
| { | |
| "epoch": 0.26, | |
| "learning_rate": 1.695783810945892e-05, | |
| "loss": 1.2681, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 0.26, | |
| "learning_rate": 1.685794892355803e-05, | |
| "loss": 1.2044, | |
| "step": 295 | |
| }, | |
| { | |
| "epoch": 0.26, | |
| "learning_rate": 1.6756750838574197e-05, | |
| "loss": 1.225, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.27, | |
| "learning_rate": 1.6654263169040413e-05, | |
| "loss": 1.272, | |
| "step": 305 | |
| }, | |
| { | |
| "epoch": 0.27, | |
| "learning_rate": 1.6550505475618054e-05, | |
| "loss": 1.1861, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 0.28, | |
| "learning_rate": 1.644549756136358e-05, | |
| "loss": 1.2229, | |
| "step": 315 | |
| }, | |
| { | |
| "epoch": 0.28, | |
| "learning_rate": 1.6339259467948965e-05, | |
| "loss": 1.2541, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 0.29, | |
| "learning_rate": 1.6231811471836535e-05, | |
| "loss": 1.2025, | |
| "step": 325 | |
| }, | |
| { | |
| "epoch": 0.29, | |
| "learning_rate": 1.6123174080409055e-05, | |
| "loss": 1.2166, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 0.29, | |
| "learning_rate": 1.6013368028055724e-05, | |
| "loss": 1.249, | |
| "step": 335 | |
| }, | |
| { | |
| "epoch": 0.3, | |
| "learning_rate": 1.5902414272214804e-05, | |
| "loss": 1.2098, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 0.3, | |
| "learning_rate": 1.5790333989373738e-05, | |
| "loss": 1.2635, | |
| "step": 345 | |
| }, | |
| { | |
| "epoch": 0.31, | |
| "learning_rate": 1.567714857102743e-05, | |
| "loss": 1.2451, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.31, | |
| "learning_rate": 1.5562879619595486e-05, | |
| "loss": 1.2561, | |
| "step": 355 | |
| }, | |
| { | |
| "epoch": 0.32, | |
| "learning_rate": 1.5447548944299203e-05, | |
| "loss": 1.2773, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 0.32, | |
| "learning_rate": 1.5331178556999094e-05, | |
| "loss": 1.2215, | |
| "step": 365 | |
| }, | |
| { | |
| "epoch": 0.33, | |
| "learning_rate": 1.5213790667993742e-05, | |
| "loss": 1.1732, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 0.33, | |
| "learning_rate": 1.5095407681780753e-05, | |
| "loss": 1.2401, | |
| "step": 375 | |
| }, | |
| { | |
| "epoch": 0.33, | |
| "learning_rate": 1.497605219278068e-05, | |
| "loss": 1.2051, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 0.34, | |
| "learning_rate": 1.4855746981024667e-05, | |
| "loss": 1.2675, | |
| "step": 385 | |
| }, | |
| { | |
| "epoch": 0.34, | |
| "learning_rate": 1.4734515007806698e-05, | |
| "loss": 1.2195, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 0.35, | |
| "learning_rate": 1.4612379411301225e-05, | |
| "loss": 1.2506, | |
| "step": 395 | |
| }, | |
| { | |
| "epoch": 0.35, | |
| "learning_rate": 1.4489363502147045e-05, | |
| "loss": 1.2441, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.36, | |
| "learning_rate": 1.4365490758998268e-05, | |
| "loss": 1.2906, | |
| "step": 405 | |
| }, | |
| { | |
| "epoch": 0.36, | |
| "learning_rate": 1.424078482404321e-05, | |
| "loss": 1.186, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 0.36, | |
| "learning_rate": 1.4115269498492075e-05, | |
| "loss": 1.2911, | |
| "step": 415 | |
| }, | |
| { | |
| "epoch": 0.37, | |
| "learning_rate": 1.3988968738034285e-05, | |
| "loss": 1.2135, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 0.37, | |
| "learning_rate": 1.3861906648266339e-05, | |
| "loss": 1.2365, | |
| "step": 425 | |
| }, | |
| { | |
| "epoch": 0.38, | |
| "learning_rate": 1.3734107480091041e-05, | |
| "loss": 1.2167, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 0.38, | |
| "learning_rate": 1.3605595625089006e-05, | |
| "loss": 1.228, | |
| "step": 435 | |
| }, | |
| { | |
| "epoch": 0.39, | |
| "learning_rate": 1.3476395610863314e-05, | |
| "loss": 1.2487, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 0.39, | |
| "learning_rate": 1.3346532096358206e-05, | |
| "loss": 1.2096, | |
| "step": 445 | |
| }, | |
| { | |
| "epoch": 0.4, | |
| "learning_rate": 1.3216029867152724e-05, | |
| "loss": 1.2412, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 0.4, | |
| "learning_rate": 1.308491383073014e-05, | |
| "loss": 1.2388, | |
| "step": 455 | |
| }, | |
| { | |
| "epoch": 0.4, | |
| "learning_rate": 1.2953209011724159e-05, | |
| "loss": 1.1793, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 0.41, | |
| "learning_rate": 1.2820940547142773e-05, | |
| "loss": 1.241, | |
| "step": 465 | |
| }, | |
| { | |
| "epoch": 0.41, | |
| "learning_rate": 1.2688133681570604e-05, | |
| "loss": 1.1933, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 0.42, | |
| "learning_rate": 1.255481376235079e-05, | |
| "loss": 1.2435, | |
| "step": 475 | |
| }, | |
| { | |
| "epoch": 0.42, | |
| "learning_rate": 1.2421006234747202e-05, | |
| "loss": 1.2284, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 0.43, | |
| "learning_rate": 1.2286736637088012e-05, | |
| "loss": 1.2461, | |
| "step": 485 | |
| }, | |
| { | |
| "epoch": 0.43, | |
| "learning_rate": 1.2152030595891467e-05, | |
| "loss": 1.2313, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 0.44, | |
| "learning_rate": 1.2016913820974855e-05, | |
| "loss": 1.2388, | |
| "step": 495 | |
| }, | |
| { | |
| "epoch": 0.44, | |
| "learning_rate": 1.1881412100547558e-05, | |
| "loss": 1.2047, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.44, | |
| "learning_rate": 1.1745551296289151e-05, | |
| "loss": 1.2011, | |
| "step": 505 | |
| }, | |
| { | |
| "epoch": 0.45, | |
| "learning_rate": 1.1609357338413476e-05, | |
| "loss": 1.1965, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 0.45, | |
| "learning_rate": 1.147285622071963e-05, | |
| "loss": 1.259, | |
| "step": 515 | |
| }, | |
| { | |
| "epoch": 0.46, | |
| "learning_rate": 1.133607399563084e-05, | |
| "loss": 1.2615, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 0.46, | |
| "learning_rate": 1.1199036769222104e-05, | |
| "loss": 1.201, | |
| "step": 525 | |
| }, | |
| { | |
| "epoch": 0.47, | |
| "learning_rate": 1.1061770696237648e-05, | |
| "loss": 1.2445, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 0.47, | |
| "learning_rate": 1.0924301975099043e-05, | |
| "loss": 1.2002, | |
| "step": 535 | |
| }, | |
| { | |
| "epoch": 0.47, | |
| "learning_rate": 1.0786656842905028e-05, | |
| "loss": 1.2267, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 0.48, | |
| "learning_rate": 1.0648861570423919e-05, | |
| "loss": 1.2257, | |
| "step": 545 | |
| }, | |
| { | |
| "epoch": 0.48, | |
| "step": 545, | |
| "total_flos": 6.944922061693256e+17, | |
| "train_loss": 1.2715962869858524, | |
| "train_runtime": 6114.717, | |
| "train_samples_per_second": 2.975, | |
| "train_steps_per_second": 0.186 | |
| } | |
| ], | |
| "logging_steps": 5, | |
| "max_steps": 1137, | |
| "num_train_epochs": 1, | |
| "save_steps": 200, | |
| "total_flos": 6.944922061693256e+17, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |