| { |
| "best_metric": 4.850142955780029, |
| "best_model_checkpoint": "models/GPT2_random_function_53/checkpoint-64260", |
| "epoch": 10.0, |
| "global_step": 64260, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.16, |
| "learning_rate": 1e-05, |
| "loss": 8.1444, |
| "step": 1000 |
| }, |
| { |
| "epoch": 0.31, |
| "learning_rate": 2e-05, |
| "loss": 6.9223, |
| "step": 2000 |
| }, |
| { |
| "epoch": 0.47, |
| "learning_rate": 3e-05, |
| "loss": 6.5883, |
| "step": 3000 |
| }, |
| { |
| "epoch": 0.62, |
| "learning_rate": 4e-05, |
| "loss": 6.3346, |
| "step": 4000 |
| }, |
| { |
| "epoch": 0.78, |
| "learning_rate": 5e-05, |
| "loss": 6.1273, |
| "step": 5000 |
| }, |
| { |
| "epoch": 0.93, |
| "learning_rate": 6e-05, |
| "loss": 5.9648, |
| "step": 6000 |
| }, |
| { |
| "epoch": 1.0, |
| "eval_accuracy": 0.17904218365591684, |
| "eval_loss": 5.775236129760742, |
| "eval_runtime": 2.2398, |
| "eval_samples_per_second": 527.282, |
| "eval_steps_per_second": 4.465, |
| "step": 6426 |
| }, |
| { |
| "epoch": 1.09, |
| "learning_rate": 7e-05, |
| "loss": 5.8208, |
| "step": 7000 |
| }, |
| { |
| "epoch": 1.24, |
| "learning_rate": 8e-05, |
| "loss": 5.706, |
| "step": 8000 |
| }, |
| { |
| "epoch": 1.4, |
| "learning_rate": 9e-05, |
| "loss": 5.6124, |
| "step": 9000 |
| }, |
| { |
| "epoch": 1.56, |
| "learning_rate": 0.0001, |
| "loss": 5.5385, |
| "step": 10000 |
| }, |
| { |
| "epoch": 1.71, |
| "learning_rate": 9.815886472539625e-05, |
| "loss": 5.4703, |
| "step": 11000 |
| }, |
| { |
| "epoch": 1.87, |
| "learning_rate": 9.631588647253964e-05, |
| "loss": 5.4131, |
| "step": 12000 |
| }, |
| { |
| "epoch": 2.0, |
| "eval_accuracy": 0.20887810276890664, |
| "eval_loss": 5.287519454956055, |
| "eval_runtime": 2.2456, |
| "eval_samples_per_second": 525.918, |
| "eval_steps_per_second": 4.453, |
| "step": 12852 |
| }, |
| { |
| "epoch": 2.02, |
| "learning_rate": 9.447475119793588e-05, |
| "loss": 5.3579, |
| "step": 13000 |
| }, |
| { |
| "epoch": 2.18, |
| "learning_rate": 9.263177294507925e-05, |
| "loss": 5.2895, |
| "step": 14000 |
| }, |
| { |
| "epoch": 2.33, |
| "learning_rate": 9.07906376704755e-05, |
| "loss": 5.2588, |
| "step": 15000 |
| }, |
| { |
| "epoch": 2.49, |
| "learning_rate": 8.894765941761888e-05, |
| "loss": 5.2346, |
| "step": 16000 |
| }, |
| { |
| "epoch": 2.65, |
| "learning_rate": 8.710652414301511e-05, |
| "loss": 5.2106, |
| "step": 17000 |
| }, |
| { |
| "epoch": 2.8, |
| "learning_rate": 8.52635458901585e-05, |
| "loss": 5.1888, |
| "step": 18000 |
| }, |
| { |
| "epoch": 2.96, |
| "learning_rate": 8.342241061555473e-05, |
| "loss": 5.1686, |
| "step": 19000 |
| }, |
| { |
| "epoch": 3.0, |
| "eval_accuracy": 0.22407275297192422, |
| "eval_loss": 5.105263710021973, |
| "eval_runtime": 2.2414, |
| "eval_samples_per_second": 526.898, |
| "eval_steps_per_second": 4.461, |
| "step": 19278 |
| }, |
| { |
| "epoch": 3.11, |
| "learning_rate": 8.157943236269813e-05, |
| "loss": 5.1083, |
| "step": 20000 |
| }, |
| { |
| "epoch": 3.27, |
| "learning_rate": 7.973829708809436e-05, |
| "loss": 5.0871, |
| "step": 21000 |
| }, |
| { |
| "epoch": 3.42, |
| "learning_rate": 7.789531883523775e-05, |
| "loss": 5.0803, |
| "step": 22000 |
| }, |
| { |
| "epoch": 3.58, |
| "learning_rate": 7.605418356063398e-05, |
| "loss": 5.0692, |
| "step": 23000 |
| }, |
| { |
| "epoch": 3.73, |
| "learning_rate": 7.421120530777738e-05, |
| "loss": 5.0601, |
| "step": 24000 |
| }, |
| { |
| "epoch": 3.89, |
| "learning_rate": 7.237007003317361e-05, |
| "loss": 5.0478, |
| "step": 25000 |
| }, |
| { |
| "epoch": 4.0, |
| "eval_accuracy": 0.23280684325974918, |
| "eval_loss": 5.014596939086914, |
| "eval_runtime": 2.2437, |
| "eval_samples_per_second": 526.372, |
| "eval_steps_per_second": 4.457, |
| "step": 25704 |
| }, |
| { |
| "epoch": 4.05, |
| "learning_rate": 7.0527091780317e-05, |
| "loss": 5.0189, |
| "step": 26000 |
| }, |
| { |
| "epoch": 4.2, |
| "learning_rate": 6.868595650571323e-05, |
| "loss": 4.9745, |
| "step": 27000 |
| }, |
| { |
| "epoch": 4.36, |
| "learning_rate": 6.684297825285662e-05, |
| "loss": 4.9769, |
| "step": 28000 |
| }, |
| { |
| "epoch": 4.51, |
| "learning_rate": 6.500184297825286e-05, |
| "loss": 4.9716, |
| "step": 29000 |
| }, |
| { |
| "epoch": 4.67, |
| "learning_rate": 6.315886472539624e-05, |
| "loss": 4.9691, |
| "step": 30000 |
| }, |
| { |
| "epoch": 4.82, |
| "learning_rate": 6.131772945079248e-05, |
| "loss": 4.9624, |
| "step": 31000 |
| }, |
| { |
| "epoch": 4.98, |
| "learning_rate": 5.947475119793586e-05, |
| "loss": 4.9577, |
| "step": 32000 |
| }, |
| { |
| "epoch": 5.0, |
| "eval_accuracy": 0.23770060071872895, |
| "eval_loss": 4.959255695343018, |
| "eval_runtime": 2.2468, |
| "eval_samples_per_second": 525.625, |
| "eval_steps_per_second": 4.451, |
| "step": 32130 |
| }, |
| { |
| "epoch": 5.14, |
| "learning_rate": 5.763361592333211e-05, |
| "loss": 4.897, |
| "step": 33000 |
| }, |
| { |
| "epoch": 5.29, |
| "learning_rate": 5.5790637670475484e-05, |
| "loss": 4.896, |
| "step": 34000 |
| }, |
| { |
| "epoch": 5.45, |
| "learning_rate": 5.394950239587173e-05, |
| "loss": 4.8947, |
| "step": 35000 |
| }, |
| { |
| "epoch": 5.6, |
| "learning_rate": 5.2106524143015115e-05, |
| "loss": 4.8963, |
| "step": 36000 |
| }, |
| { |
| "epoch": 5.76, |
| "learning_rate": 5.0265388868411356e-05, |
| "loss": 4.8956, |
| "step": 37000 |
| }, |
| { |
| "epoch": 5.91, |
| "learning_rate": 4.842241061555474e-05, |
| "loss": 4.8934, |
| "step": 38000 |
| }, |
| { |
| "epoch": 6.0, |
| "eval_accuracy": 0.24108089367745206, |
| "eval_loss": 4.919989585876465, |
| "eval_runtime": 2.2539, |
| "eval_samples_per_second": 523.978, |
| "eval_steps_per_second": 4.437, |
| "step": 38556 |
| }, |
| { |
| "epoch": 6.07, |
| "learning_rate": 4.658127534095098e-05, |
| "loss": 4.8613, |
| "step": 39000 |
| }, |
| { |
| "epoch": 6.22, |
| "learning_rate": 4.4738297088094364e-05, |
| "loss": 4.8307, |
| "step": 40000 |
| }, |
| { |
| "epoch": 6.38, |
| "learning_rate": 4.2897161813490605e-05, |
| "loss": 4.8343, |
| "step": 41000 |
| }, |
| { |
| "epoch": 6.54, |
| "learning_rate": 4.105418356063399e-05, |
| "loss": 4.8358, |
| "step": 42000 |
| }, |
| { |
| "epoch": 6.69, |
| "learning_rate": 3.921304828603022e-05, |
| "loss": 4.8388, |
| "step": 43000 |
| }, |
| { |
| "epoch": 6.85, |
| "learning_rate": 3.737007003317361e-05, |
| "loss": 4.8346, |
| "step": 44000 |
| }, |
| { |
| "epoch": 7.0, |
| "eval_accuracy": 0.2446545367265163, |
| "eval_loss": 4.891112804412842, |
| "eval_runtime": 2.2386, |
| "eval_samples_per_second": 527.55, |
| "eval_steps_per_second": 4.467, |
| "step": 44982 |
| }, |
| { |
| "epoch": 7.0, |
| "learning_rate": 3.5527091780316996e-05, |
| "loss": 4.8336, |
| "step": 45000 |
| }, |
| { |
| "epoch": 7.16, |
| "learning_rate": 3.368595650571324e-05, |
| "loss": 4.7732, |
| "step": 46000 |
| }, |
| { |
| "epoch": 7.31, |
| "learning_rate": 3.184297825285662e-05, |
| "loss": 4.7827, |
| "step": 47000 |
| }, |
| { |
| "epoch": 7.47, |
| "learning_rate": 3.0001842978252858e-05, |
| "loss": 4.7858, |
| "step": 48000 |
| }, |
| { |
| "epoch": 7.63, |
| "learning_rate": 2.815886472539624e-05, |
| "loss": 4.7847, |
| "step": 49000 |
| }, |
| { |
| "epoch": 7.78, |
| "learning_rate": 2.6317729450792483e-05, |
| "loss": 4.7837, |
| "step": 50000 |
| }, |
| { |
| "epoch": 7.94, |
| "learning_rate": 2.4474751197935866e-05, |
| "loss": 4.7873, |
| "step": 51000 |
| }, |
| { |
| "epoch": 8.0, |
| "eval_accuracy": 0.24654803416296078, |
| "eval_loss": 4.87035608291626, |
| "eval_runtime": 2.2381, |
| "eval_samples_per_second": 527.678, |
| "eval_steps_per_second": 4.468, |
| "step": 51408 |
| }, |
| { |
| "epoch": 8.09, |
| "learning_rate": 2.2633615923332104e-05, |
| "loss": 4.7521, |
| "step": 52000 |
| }, |
| { |
| "epoch": 8.25, |
| "learning_rate": 2.079063767047549e-05, |
| "loss": 4.7361, |
| "step": 53000 |
| }, |
| { |
| "epoch": 8.4, |
| "learning_rate": 1.8949502395871728e-05, |
| "loss": 4.7392, |
| "step": 54000 |
| }, |
| { |
| "epoch": 8.56, |
| "learning_rate": 1.710652414301511e-05, |
| "loss": 4.74, |
| "step": 55000 |
| }, |
| { |
| "epoch": 8.71, |
| "learning_rate": 1.5265388868411353e-05, |
| "loss": 4.7394, |
| "step": 56000 |
| }, |
| { |
| "epoch": 8.87, |
| "learning_rate": 1.3422410615554738e-05, |
| "loss": 4.7405, |
| "step": 57000 |
| }, |
| { |
| "epoch": 9.0, |
| "eval_accuracy": 0.2484748678218779, |
| "eval_loss": 4.856573581695557, |
| "eval_runtime": 2.2363, |
| "eval_samples_per_second": 528.096, |
| "eval_steps_per_second": 4.472, |
| "step": 57834 |
| }, |
| { |
| "epoch": 9.03, |
| "learning_rate": 1.1581275340950977e-05, |
| "loss": 4.7344, |
| "step": 58000 |
| }, |
| { |
| "epoch": 9.18, |
| "learning_rate": 9.738297088094362e-06, |
| "loss": 4.7017, |
| "step": 59000 |
| }, |
| { |
| "epoch": 9.34, |
| "learning_rate": 7.897161813490601e-06, |
| "loss": 4.7015, |
| "step": 60000 |
| }, |
| { |
| "epoch": 9.49, |
| "learning_rate": 6.054183560633985e-06, |
| "loss": 4.7021, |
| "step": 61000 |
| }, |
| { |
| "epoch": 9.65, |
| "learning_rate": 4.211205307777368e-06, |
| "loss": 4.7018, |
| "step": 62000 |
| }, |
| { |
| "epoch": 9.8, |
| "learning_rate": 2.3700700331736087e-06, |
| "loss": 4.7016, |
| "step": 63000 |
| }, |
| { |
| "epoch": 9.96, |
| "learning_rate": 5.270917803169923e-07, |
| "loss": 4.7008, |
| "step": 64000 |
| }, |
| { |
| "epoch": 10.0, |
| "eval_accuracy": 0.2494349510290892, |
| "eval_loss": 4.850142955780029, |
| "eval_runtime": 2.2387, |
| "eval_samples_per_second": 527.548, |
| "eval_steps_per_second": 4.467, |
| "step": 64260 |
| }, |
| { |
| "epoch": 10.0, |
| "step": 64260, |
| "total_flos": 5.37232748544e+17, |
| "train_loss": 5.138318771519025, |
| "train_runtime": 32600.4959, |
| "train_samples_per_second": 252.274, |
| "train_steps_per_second": 1.971 |
| } |
| ], |
| "max_steps": 64260, |
| "num_train_epochs": 10, |
| "total_flos": 5.37232748544e+17, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|