| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 2.9897610921501707, | |
| "eval_steps": 146, | |
| "global_step": 438, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.06825938566552901, | |
| "grad_norm": 180.72653528520956, | |
| "learning_rate": 4.942922374429224e-07, | |
| "loss": 3.2191, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.13651877133105803, | |
| "grad_norm": 67.78875632967043, | |
| "learning_rate": 4.885844748858447e-07, | |
| "loss": 1.3006, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.20477815699658702, | |
| "grad_norm": 58.498874329827956, | |
| "learning_rate": 4.828767123287671e-07, | |
| "loss": 1.0222, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.27303754266211605, | |
| "grad_norm": 65.81012271745634, | |
| "learning_rate": 4.771689497716894e-07, | |
| "loss": 1.0702, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.3412969283276451, | |
| "grad_norm": 67.6868122468499, | |
| "learning_rate": 4.7146118721461187e-07, | |
| "loss": 0.8233, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.40955631399317405, | |
| "grad_norm": 130.6895452398653, | |
| "learning_rate": 4.657534246575342e-07, | |
| "loss": 1.0114, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.4778156996587031, | |
| "grad_norm": 67.10774835755684, | |
| "learning_rate": 4.600456621004566e-07, | |
| "loss": 0.7751, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.5460750853242321, | |
| "grad_norm": 222.42047329326795, | |
| "learning_rate": 4.54337899543379e-07, | |
| "loss": 0.9296, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.6143344709897611, | |
| "grad_norm": 53.90173100677448, | |
| "learning_rate": 4.4863013698630134e-07, | |
| "loss": 0.8508, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.6825938566552902, | |
| "grad_norm": 73.87543764538118, | |
| "learning_rate": 4.429223744292237e-07, | |
| "loss": 0.7461, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.7508532423208191, | |
| "grad_norm": 51.730677807091766, | |
| "learning_rate": 4.372146118721461e-07, | |
| "loss": 0.6538, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.8191126279863481, | |
| "grad_norm": 58.532049779965085, | |
| "learning_rate": 4.315068493150685e-07, | |
| "loss": 0.7099, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.8873720136518771, | |
| "grad_norm": 69.81205859668461, | |
| "learning_rate": 4.2579908675799087e-07, | |
| "loss": 0.7059, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.9556313993174061, | |
| "grad_norm": 48.161843074686466, | |
| "learning_rate": 4.200913242009132e-07, | |
| "loss": 0.8547, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.9965870307167235, | |
| "eval_loss": 0.6474742889404297, | |
| "eval_runtime": 10.4556, | |
| "eval_samples_per_second": 24.867, | |
| "eval_steps_per_second": 3.156, | |
| "step": 146 | |
| }, | |
| { | |
| "epoch": 1.023890784982935, | |
| "grad_norm": 27.25525751270437, | |
| "learning_rate": 4.143835616438356e-07, | |
| "loss": 0.6076, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 1.0921501706484642, | |
| "grad_norm": 25.863617719976116, | |
| "learning_rate": 4.0867579908675797e-07, | |
| "loss": 0.5054, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 1.1604095563139931, | |
| "grad_norm": 52.70780610444248, | |
| "learning_rate": 4.029680365296804e-07, | |
| "loss": 0.4987, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 1.2286689419795223, | |
| "grad_norm": 25.718837256960583, | |
| "learning_rate": 3.972602739726027e-07, | |
| "loss": 0.5697, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 1.2969283276450512, | |
| "grad_norm": 36.98618805648008, | |
| "learning_rate": 3.915525114155251e-07, | |
| "loss": 0.615, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 1.36518771331058, | |
| "grad_norm": 32.58402570740449, | |
| "learning_rate": 3.858447488584475e-07, | |
| "loss": 0.5849, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 1.4334470989761092, | |
| "grad_norm": 23.37886970180941, | |
| "learning_rate": 3.8013698630136986e-07, | |
| "loss": 0.5924, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 1.5017064846416384, | |
| "grad_norm": 27.73682160230221, | |
| "learning_rate": 3.744292237442922e-07, | |
| "loss": 0.5095, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 1.5699658703071673, | |
| "grad_norm": 58.838640484256175, | |
| "learning_rate": 3.687214611872146e-07, | |
| "loss": 0.4661, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 1.6382252559726962, | |
| "grad_norm": 29.677456691329837, | |
| "learning_rate": 3.6301369863013697e-07, | |
| "loss": 0.5448, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 1.7064846416382253, | |
| "grad_norm": 83.73694041166883, | |
| "learning_rate": 3.573059360730594e-07, | |
| "loss": 0.4715, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 1.7747440273037542, | |
| "grad_norm": 72.88041601319203, | |
| "learning_rate": 3.515981735159817e-07, | |
| "loss": 0.4408, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 1.8430034129692832, | |
| "grad_norm": 48.86805911410425, | |
| "learning_rate": 3.4589041095890407e-07, | |
| "loss": 0.5299, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 1.9112627986348123, | |
| "grad_norm": 56.39227585073634, | |
| "learning_rate": 3.401826484018265e-07, | |
| "loss": 0.462, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 1.9795221843003414, | |
| "grad_norm": 47.10405902113264, | |
| "learning_rate": 3.3447488584474886e-07, | |
| "loss": 0.5135, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 1.993174061433447, | |
| "eval_loss": 0.6217488646507263, | |
| "eval_runtime": 10.0993, | |
| "eval_samples_per_second": 25.744, | |
| "eval_steps_per_second": 3.268, | |
| "step": 292 | |
| }, | |
| { | |
| "epoch": 2.04778156996587, | |
| "grad_norm": 47.53720316702887, | |
| "learning_rate": 3.287671232876712e-07, | |
| "loss": 0.4304, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 2.1160409556313993, | |
| "grad_norm": 29.27611194113683, | |
| "learning_rate": 3.230593607305936e-07, | |
| "loss": 0.4024, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 2.1843003412969284, | |
| "grad_norm": 22.633855883872762, | |
| "learning_rate": 3.1735159817351596e-07, | |
| "loss": 0.3316, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 2.252559726962457, | |
| "grad_norm": 33.60546914801217, | |
| "learning_rate": 3.116438356164384e-07, | |
| "loss": 0.3474, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 2.3208191126279862, | |
| "grad_norm": 38.727812421442835, | |
| "learning_rate": 3.059360730593607e-07, | |
| "loss": 0.3546, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 2.3890784982935154, | |
| "grad_norm": 41.67978939810039, | |
| "learning_rate": 3.0022831050228307e-07, | |
| "loss": 0.2542, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 2.4573378839590445, | |
| "grad_norm": 77.57463923271911, | |
| "learning_rate": 2.945205479452055e-07, | |
| "loss": 0.2766, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 2.5255972696245736, | |
| "grad_norm": 59.43311669158369, | |
| "learning_rate": 2.8881278538812786e-07, | |
| "loss": 0.44, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 2.5938566552901023, | |
| "grad_norm": 22.434362950673165, | |
| "learning_rate": 2.831050228310502e-07, | |
| "loss": 0.2935, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 2.6621160409556315, | |
| "grad_norm": 30.626819579935223, | |
| "learning_rate": 2.773972602739726e-07, | |
| "loss": 0.3208, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 2.73037542662116, | |
| "grad_norm": 64.9106394772347, | |
| "learning_rate": 2.7168949771689496e-07, | |
| "loss": 0.41, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 2.7986348122866893, | |
| "grad_norm": 112.81858040393827, | |
| "learning_rate": 2.659817351598174e-07, | |
| "loss": 0.4234, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 2.8668941979522184, | |
| "grad_norm": 25.381208288443705, | |
| "learning_rate": 2.602739726027397e-07, | |
| "loss": 0.3328, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 2.9351535836177476, | |
| "grad_norm": 45.82561826818154, | |
| "learning_rate": 2.5456621004566206e-07, | |
| "loss": 0.4019, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 2.9897610921501707, | |
| "eval_loss": 0.64452064037323, | |
| "eval_runtime": 10.0372, | |
| "eval_samples_per_second": 25.904, | |
| "eval_steps_per_second": 3.288, | |
| "step": 438 | |
| } | |
| ], | |
| "logging_steps": 10, | |
| "max_steps": 876, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 6, | |
| "save_steps": 146, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": false | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 6392788746240.0, | |
| "train_batch_size": 1, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |