Instructions to use diffusion-reasoning/wll_SFT_NP_math-2500 with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- PEFT
How to use diffusion-reasoning/wll_SFT_NP_math-2500 with PEFT:
from peft import PeftModel from transformers import AutoModelForCausalLM base_model = AutoModelForCausalLM.from_pretrained("GSAI-ML/LLaDA-8B-Instruct") model = PeftModel.from_pretrained(base_model, "diffusion-reasoning/wll_SFT_NP_math-2500") - Notebooks
- Google Colab
- Kaggle
| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 0.2222222222222222, | |
| "eval_steps": 1000, | |
| "global_step": 2500, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "completion_length": 246.3125, | |
| "epoch": 8.888888888888889e-05, | |
| "grad_norm": 60.320960998535156, | |
| "learning_rate": 2.5e-07, | |
| "loss": -10.229, | |
| "reward": 1.7395833730697632, | |
| "reward_std": 0.6432403922080994, | |
| "rewards/boxed_and_answer_tags_format_reward": 0.65625, | |
| "rewards/correctness_reward_func_math": 1.0833333432674408, | |
| "step": 1, | |
| "zero_std_ratio": 0.0 | |
| }, | |
| { | |
| "epoch": 0.00017777777777777779, | |
| "grad_norm": 71.0031509399414, | |
| "learning_rate": 5e-07, | |
| "loss": -9.5625, | |
| "step": 2 | |
| }, | |
| { | |
| "epoch": 0.0002666666666666667, | |
| "grad_norm": 61.95022964477539, | |
| "learning_rate": 7.5e-07, | |
| "loss": -16.2291, | |
| "step": 3 | |
| }, | |
| { | |
| "epoch": 0.00035555555555555557, | |
| "grad_norm": 67.81867980957031, | |
| "learning_rate": 1e-06, | |
| "loss": -11.0016, | |
| "step": 4 | |
| }, | |
| { | |
| "epoch": 0.00044444444444444447, | |
| "grad_norm": 57.108917236328125, | |
| "learning_rate": 1.25e-06, | |
| "loss": -6.1658, | |
| "step": 5 | |
| }, | |
| { | |
| "epoch": 0.0005333333333333334, | |
| "grad_norm": 72.85011291503906, | |
| "learning_rate": 1.5e-06, | |
| "loss": -4.0145, | |
| "step": 6 | |
| }, | |
| { | |
| "epoch": 0.0006222222222222223, | |
| "grad_norm": 59.103431701660156, | |
| "learning_rate": 1.7500000000000002e-06, | |
| "loss": -9.9488, | |
| "step": 7 | |
| }, | |
| { | |
| "epoch": 0.0007111111111111111, | |
| "grad_norm": 73.94007873535156, | |
| "learning_rate": 2e-06, | |
| "loss": -9.1809, | |
| "step": 8 | |
| }, | |
| { | |
| "epoch": 0.0008, | |
| "grad_norm": 62.28184509277344, | |
| "learning_rate": 2.25e-06, | |
| "loss": -16.4311, | |
| "step": 9 | |
| }, | |
| { | |
| "epoch": 0.0008888888888888889, | |
| "grad_norm": 68.46251678466797, | |
| "learning_rate": 2.5e-06, | |
| "loss": -11.402, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.0009777777777777777, | |
| "grad_norm": 59.184749603271484, | |
| "learning_rate": 2.75e-06, | |
| "loss": -6.1384, | |
| "step": 11 | |
| }, | |
| { | |
| "epoch": 0.0010666666666666667, | |
| "grad_norm": 71.60365295410156, | |
| "learning_rate": 3e-06, | |
| "loss": -3.932, | |
| "step": 12 | |
| }, | |
| { | |
| "completion_length": 249.62500762939453, | |
| "epoch": 0.0011555555555555555, | |
| "grad_norm": 61.79197311401367, | |
| "learning_rate": 3e-06, | |
| "loss": -2.1748, | |
| "reward": 0.9791666865348816, | |
| "reward_std": 0.4510806053876877, | |
| "rewards/boxed_and_answer_tags_format_reward": 0.6458333134651184, | |
| "rewards/correctness_reward_func_math": 0.3333333283662796, | |
| "step": 13, | |
| "zero_std_ratio": 0.125 | |
| }, | |
| { | |
| "epoch": 0.0012444444444444445, | |
| "grad_norm": 55.82655334472656, | |
| "learning_rate": 3e-06, | |
| "loss": -3.8107, | |
| "step": 14 | |
| }, | |
| { | |
| "epoch": 0.0013333333333333333, | |
| "grad_norm": 58.3712158203125, | |
| "learning_rate": 3e-06, | |
| "loss": 4.085, | |
| "step": 15 | |
| }, | |
| { | |
| "epoch": 0.0014222222222222223, | |
| "grad_norm": 73.97306823730469, | |
| "learning_rate": 3e-06, | |
| "loss": -4.8915, | |
| "step": 16 | |
| }, | |
| { | |
| "epoch": 0.001511111111111111, | |
| "grad_norm": 51.51576232910156, | |
| "learning_rate": 3e-06, | |
| "loss": -0.161, | |
| "step": 17 | |
| }, | |
| { | |
| "epoch": 0.0016, | |
| "grad_norm": 72.59639739990234, | |
| "learning_rate": 3e-06, | |
| "loss": 3.0369, | |
| "step": 18 | |
| }, | |
| { | |
| "epoch": 0.0016888888888888889, | |
| "grad_norm": 48.86510467529297, | |
| "learning_rate": 3e-06, | |
| "loss": -2.3437, | |
| "step": 19 | |
| }, | |
| { | |
| "epoch": 0.0017777777777777779, | |
| "grad_norm": 55.3180046081543, | |
| "learning_rate": 3e-06, | |
| "loss": -4.3585, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.0018666666666666666, | |
| "grad_norm": 54.75101089477539, | |
| "learning_rate": 3e-06, | |
| "loss": 3.9965, | |
| "step": 21 | |
| }, | |
| { | |
| "epoch": 0.0019555555555555554, | |
| "grad_norm": 75.27330017089844, | |
| "learning_rate": 3e-06, | |
| "loss": -4.9733, | |
| "step": 22 | |
| }, | |
| { | |
| "epoch": 0.0020444444444444447, | |
| "grad_norm": 51.991214752197266, | |
| "learning_rate": 3e-06, | |
| "loss": -0.3033, | |
| "step": 23 | |
| }, | |
| { | |
| "epoch": 0.0021333333333333334, | |
| "grad_norm": 54.72827911376953, | |
| "learning_rate": 3e-06, | |
| "loss": 2.5278, | |
| "step": 24 | |
| }, | |
| { | |
| "completion_length": 226.8125, | |
| "epoch": 0.0022222222222222222, | |
| "grad_norm": 58.25025939941406, | |
| "learning_rate": 3e-06, | |
| "loss": -6.7768, | |
| "reward": 1.6041666865348816, | |
| "reward_std": 0.6311438381671906, | |
| "rewards/boxed_and_answer_tags_format_reward": 0.6458333432674408, | |
| "rewards/correctness_reward_func_math": 0.9583333432674408, | |
| "step": 25, | |
| "zero_std_ratio": 0.0 | |
| }, | |
| { | |
| "epoch": 0.002311111111111111, | |
| "grad_norm": 68.62551879882812, | |
| "learning_rate": 3e-06, | |
| "loss": -4.0906, | |
| "step": 26 | |
| }, | |
| { | |
| "epoch": 0.0024, | |
| "grad_norm": 65.11053466796875, | |
| "learning_rate": 3e-06, | |
| "loss": -4.6172, | |
| "step": 27 | |
| }, | |
| { | |
| "epoch": 0.002488888888888889, | |
| "grad_norm": 76.28429412841797, | |
| "learning_rate": 3e-06, | |
| "loss": -7.9209, | |
| "step": 28 | |
| }, | |
| { | |
| "epoch": 0.002577777777777778, | |
| "grad_norm": 62.037696838378906, | |
| "learning_rate": 3e-06, | |
| "loss": -3.4414, | |
| "step": 29 | |
| }, | |
| { | |
| "epoch": 0.0026666666666666666, | |
| "grad_norm": 58.92220687866211, | |
| "learning_rate": 3e-06, | |
| "loss": -3.2836, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.0027555555555555554, | |
| "grad_norm": 57.03800582885742, | |
| "learning_rate": 3e-06, | |
| "loss": -7.1747, | |
| "step": 31 | |
| }, | |
| { | |
| "epoch": 0.0028444444444444446, | |
| "grad_norm": 71.39422607421875, | |
| "learning_rate": 3e-06, | |
| "loss": -4.5251, | |
| "step": 32 | |
| }, | |
| { | |
| "epoch": 0.0029333333333333334, | |
| "grad_norm": 130.19813537597656, | |
| "learning_rate": 3e-06, | |
| "loss": -4.5744, | |
| "step": 33 | |
| }, | |
| { | |
| "epoch": 0.003022222222222222, | |
| "grad_norm": 76.09828186035156, | |
| "learning_rate": 3e-06, | |
| "loss": -7.9552, | |
| "step": 34 | |
| }, | |
| { | |
| "epoch": 0.003111111111111111, | |
| "grad_norm": 63.77288055419922, | |
| "learning_rate": 3e-06, | |
| "loss": -3.6391, | |
| "step": 35 | |
| }, | |
| { | |
| "epoch": 0.0032, | |
| "grad_norm": 58.53509521484375, | |
| "learning_rate": 3e-06, | |
| "loss": -3.9415, | |
| "step": 36 | |
| }, | |
| { | |
| "completion_length": 245.14583587646484, | |
| "epoch": 0.003288888888888889, | |
| "grad_norm": 53.06296920776367, | |
| "learning_rate": 3e-06, | |
| "loss": 7.1798, | |
| "reward": 0.9375000298023224, | |
| "reward_std": 0.3340114951133728, | |
| "rewards/boxed_and_answer_tags_format_reward": 0.6041666567325592, | |
| "rewards/correctness_reward_func_math": 0.3333333246409893, | |
| "step": 37, | |
| "zero_std_ratio": 0.25 | |
| }, | |
| { | |
| "epoch": 0.0033777777777777777, | |
| "grad_norm": 78.04679870605469, | |
| "learning_rate": 3e-06, | |
| "loss": 13.2393, | |
| "step": 38 | |
| }, | |
| { | |
| "epoch": 0.0034666666666666665, | |
| "grad_norm": 64.38521575927734, | |
| "learning_rate": 3e-06, | |
| "loss": 11.5406, | |
| "step": 39 | |
| }, | |
| { | |
| "epoch": 0.0035555555555555557, | |
| "grad_norm": 56.69493865966797, | |
| "learning_rate": 3e-06, | |
| "loss": 11.0537, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.0036444444444444445, | |
| "grad_norm": 59.67893600463867, | |
| "learning_rate": 3e-06, | |
| "loss": 12.2084, | |
| "step": 41 | |
| }, | |
| { | |
| "epoch": 0.0037333333333333333, | |
| "grad_norm": 44.71684646606445, | |
| "learning_rate": 3e-06, | |
| "loss": 14.915, | |
| "step": 42 | |
| }, | |
| { | |
| "epoch": 0.003822222222222222, | |
| "grad_norm": 53.003570556640625, | |
| "learning_rate": 3e-06, | |
| "loss": 7.1581, | |
| "step": 43 | |
| }, | |
| { | |
| "epoch": 0.003911111111111111, | |
| "grad_norm": 86.505615234375, | |
| "learning_rate": 3e-06, | |
| "loss": 13.0403, | |
| "step": 44 | |
| }, | |
| { | |
| "epoch": 0.004, | |
| "grad_norm": 73.7258529663086, | |
| "learning_rate": 3e-06, | |
| "loss": 11.1962, | |
| "step": 45 | |
| }, | |
| { | |
| "epoch": 0.004088888888888889, | |
| "grad_norm": 92.57136535644531, | |
| "learning_rate": 3e-06, | |
| "loss": 10.6526, | |
| "step": 46 | |
| }, | |
| { | |
| "epoch": 0.004177777777777778, | |
| "grad_norm": 63.43205642700195, | |
| "learning_rate": 3e-06, | |
| "loss": 11.8212, | |
| "step": 47 | |
| }, | |
| { | |
| "epoch": 0.004266666666666667, | |
| "grad_norm": 44.73876953125, | |
| "learning_rate": 3e-06, | |
| "loss": 14.3455, | |
| "step": 48 | |
| }, | |
| { | |
| "completion_length": 236.37500762939453, | |
| "epoch": 0.004355555555555555, | |
| "grad_norm": 42.604164123535156, | |
| "learning_rate": 3e-06, | |
| "loss": -28.8562, | |
| "reward": 1.6145833730697632, | |
| "reward_std": 0.3440491110086441, | |
| "rewards/boxed_and_answer_tags_format_reward": 0.65625, | |
| "rewards/correctness_reward_func_math": 0.9583333432674408, | |
| "step": 49, | |
| "zero_std_ratio": 0.125 | |
| }, | |
| { | |
| "epoch": 0.0044444444444444444, | |
| "grad_norm": 55.280696868896484, | |
| "learning_rate": 3e-06, | |
| "loss": -29.8671, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.004533333333333334, | |
| "grad_norm": 53.84416198730469, | |
| "learning_rate": 3e-06, | |
| "loss": -28.6148, | |
| "step": 51 | |
| }, | |
| { | |
| "epoch": 0.004622222222222222, | |
| "grad_norm": 48.8647575378418, | |
| "learning_rate": 3e-06, | |
| "loss": -28.0853, | |
| "step": 52 | |
| }, | |
| { | |
| "epoch": 0.004711111111111111, | |
| "grad_norm": 65.01343536376953, | |
| "learning_rate": 3e-06, | |
| "loss": -26.2356, | |
| "step": 53 | |
| }, | |
| { | |
| "epoch": 0.0048, | |
| "grad_norm": 64.81402587890625, | |
| "learning_rate": 3e-06, | |
| "loss": -30.8205, | |
| "step": 54 | |
| }, | |
| { | |
| "epoch": 0.004888888888888889, | |
| "grad_norm": 44.85778045654297, | |
| "learning_rate": 3e-06, | |
| "loss": -28.6691, | |
| "step": 55 | |
| }, | |
| { | |
| "epoch": 0.004977777777777778, | |
| "grad_norm": 45.61606216430664, | |
| "learning_rate": 3e-06, | |
| "loss": -30.0595, | |
| "step": 56 | |
| }, | |
| { | |
| "epoch": 0.005066666666666666, | |
| "grad_norm": 49.3116455078125, | |
| "learning_rate": 3e-06, | |
| "loss": -28.8315, | |
| "step": 57 | |
| }, | |
| { | |
| "epoch": 0.005155555555555556, | |
| "grad_norm": 45.42935562133789, | |
| "learning_rate": 3e-06, | |
| "loss": -28.1493, | |
| "step": 58 | |
| }, | |
| { | |
| "epoch": 0.005244444444444445, | |
| "grad_norm": 52.282257080078125, | |
| "learning_rate": 3e-06, | |
| "loss": -26.8023, | |
| "step": 59 | |
| }, | |
| { | |
| "epoch": 0.005333333333333333, | |
| "grad_norm": 61.042945861816406, | |
| "learning_rate": 3e-06, | |
| "loss": -30.9091, | |
| "step": 60 | |
| }, | |
| { | |
| "completion_length": 250.8541717529297, | |
| "epoch": 0.005422222222222222, | |
| "grad_norm": 56.66669464111328, | |
| "learning_rate": 3e-06, | |
| "loss": -4.2644, | |
| "reward": 1.0104166865348816, | |
| "reward_std": 0.3859569579362869, | |
| "rewards/boxed_and_answer_tags_format_reward": 0.5104166716337204, | |
| "rewards/correctness_reward_func_math": 0.4999999850988388, | |
| "step": 61, | |
| "zero_std_ratio": 0.5 | |
| }, | |
| { | |
| "epoch": 0.005511111111111111, | |
| "grad_norm": 46.742279052734375, | |
| "learning_rate": 3e-06, | |
| "loss": 1.921, | |
| "step": 62 | |
| }, | |
| { | |
| "epoch": 0.0056, | |
| "grad_norm": 64.74068450927734, | |
| "learning_rate": 3e-06, | |
| "loss": -1.0677, | |
| "step": 63 | |
| }, | |
| { | |
| "epoch": 0.005688888888888889, | |
| "grad_norm": 53.72319412231445, | |
| "learning_rate": 3e-06, | |
| "loss": 0.7498, | |
| "step": 64 | |
| }, | |
| { | |
| "epoch": 0.0057777777777777775, | |
| "grad_norm": 51.9224739074707, | |
| "learning_rate": 3e-06, | |
| "loss": -1.7073, | |
| "step": 65 | |
| }, | |
| { | |
| "epoch": 0.005866666666666667, | |
| "grad_norm": 49.95579528808594, | |
| "learning_rate": 3e-06, | |
| "loss": -4.7011, | |
| "step": 66 | |
| }, | |
| { | |
| "epoch": 0.005955555555555556, | |
| "grad_norm": 54.09262466430664, | |
| "learning_rate": 3e-06, | |
| "loss": -4.882, | |
| "step": 67 | |
| }, | |
| { | |
| "epoch": 0.006044444444444444, | |
| "grad_norm": 51.433746337890625, | |
| "learning_rate": 3e-06, | |
| "loss": 1.6496, | |
| "step": 68 | |
| }, | |
| { | |
| "epoch": 0.0061333333333333335, | |
| "grad_norm": 48.16537094116211, | |
| "learning_rate": 3e-06, | |
| "loss": -1.5035, | |
| "step": 69 | |
| }, | |
| { | |
| "epoch": 0.006222222222222222, | |
| "grad_norm": 55.34268569946289, | |
| "learning_rate": 3e-06, | |
| "loss": 0.0384, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.006311111111111111, | |
| "grad_norm": 45.631813049316406, | |
| "learning_rate": 3e-06, | |
| "loss": -1.8713, | |
| "step": 71 | |
| }, | |
| { | |
| "epoch": 0.0064, | |
| "grad_norm": 48.471473693847656, | |
| "learning_rate": 3e-06, | |
| "loss": -4.9618, | |
| "step": 72 | |
| }, | |
| { | |
| "completion_length": 231.14583587646484, | |
| "epoch": 0.006488888888888889, | |
| "grad_norm": 91.5987777709961, | |
| "learning_rate": 3e-06, | |
| "loss": 59.5993, | |
| "reward": 1.1145833730697632, | |
| "reward_std": 0.4806128740310669, | |
| "rewards/boxed_and_answer_tags_format_reward": 0.6145833432674408, | |
| "rewards/correctness_reward_func_math": 0.5, | |
| "step": 73, | |
| "zero_std_ratio": 0.125 | |
| }, | |
| { | |
| "epoch": 0.006577777777777778, | |
| "grad_norm": 83.08058166503906, | |
| "learning_rate": 3e-06, | |
| "loss": 67.2965, | |
| "step": 74 | |
| }, | |
| { | |
| "epoch": 0.006666666666666667, | |
| "grad_norm": 65.38250732421875, | |
| "learning_rate": 3e-06, | |
| "loss": 60.7344, | |
| "step": 75 | |
| }, | |
| { | |
| "epoch": 0.0067555555555555554, | |
| "grad_norm": 66.78120422363281, | |
| "learning_rate": 3e-06, | |
| "loss": 63.4533, | |
| "step": 76 | |
| }, | |
| { | |
| "epoch": 0.006844444444444445, | |
| "grad_norm": 62.675838470458984, | |
| "learning_rate": 3e-06, | |
| "loss": 54.1729, | |
| "step": 77 | |
| }, | |
| { | |
| "epoch": 0.006933333333333333, | |
| "grad_norm": 63.28793716430664, | |
| "learning_rate": 3e-06, | |
| "loss": 61.2604, | |
| "step": 78 | |
| }, | |
| { | |
| "epoch": 0.007022222222222222, | |
| "grad_norm": 75.33735656738281, | |
| "learning_rate": 3e-06, | |
| "loss": 59.0054, | |
| "step": 79 | |
| }, | |
| { | |
| "epoch": 0.0071111111111111115, | |
| "grad_norm": 86.537109375, | |
| "learning_rate": 3e-06, | |
| "loss": 66.586, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.0072, | |
| "grad_norm": 66.0783462524414, | |
| "learning_rate": 3e-06, | |
| "loss": 59.9151, | |
| "step": 81 | |
| }, | |
| { | |
| "epoch": 0.007288888888888889, | |
| "grad_norm": 66.10869598388672, | |
| "learning_rate": 3e-06, | |
| "loss": 62.0304, | |
| "step": 82 | |
| }, | |
| { | |
| "epoch": 0.007377777777777777, | |
| "grad_norm": 58.026912689208984, | |
| "learning_rate": 3e-06, | |
| "loss": 53.3198, | |
| "step": 83 | |
| }, | |
| { | |
| "epoch": 0.007466666666666667, | |
| "grad_norm": 59.65370559692383, | |
| "learning_rate": 3e-06, | |
| "loss": 60.237, | |
| "step": 84 | |
| }, | |
| { | |
| "completion_length": 246.70833587646484, | |
| "epoch": 0.007555555555555556, | |
| "grad_norm": 38.2843017578125, | |
| "learning_rate": 3e-06, | |
| "loss": -3.4492, | |
| "reward": 0.9270833730697632, | |
| "reward_std": 0.2587623968720436, | |
| "rewards/boxed_and_answer_tags_format_reward": 0.6354166567325592, | |
| "rewards/correctness_reward_func_math": 0.2916666679084301, | |
| "step": 85, | |
| "zero_std_ratio": 0.125 | |
| }, | |
| { | |
| "epoch": 0.007644444444444444, | |
| "grad_norm": 42.626834869384766, | |
| "learning_rate": 3e-06, | |
| "loss": -3.0457, | |
| "step": 86 | |
| }, | |
| { | |
| "epoch": 0.007733333333333333, | |
| "grad_norm": 31.817684173583984, | |
| "learning_rate": 3e-06, | |
| "loss": 0.2054, | |
| "step": 87 | |
| }, | |
| { | |
| "epoch": 0.007822222222222222, | |
| "grad_norm": 41.712833404541016, | |
| "learning_rate": 3e-06, | |
| "loss": 0.6522, | |
| "step": 88 | |
| }, | |
| { | |
| "epoch": 0.007911111111111112, | |
| "grad_norm": 33.385929107666016, | |
| "learning_rate": 3e-06, | |
| "loss": -2.3715, | |
| "step": 89 | |
| }, | |
| { | |
| "epoch": 0.008, | |
| "grad_norm": 43.1032829284668, | |
| "learning_rate": 3e-06, | |
| "loss": 1.5502, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.008088888888888889, | |
| "grad_norm": 36.241458892822266, | |
| "learning_rate": 3e-06, | |
| "loss": -3.5684, | |
| "step": 91 | |
| }, | |
| { | |
| "epoch": 0.008177777777777779, | |
| "grad_norm": 41.06986618041992, | |
| "learning_rate": 3e-06, | |
| "loss": -3.2263, | |
| "step": 92 | |
| }, | |
| { | |
| "epoch": 0.008266666666666667, | |
| "grad_norm": 31.25284767150879, | |
| "learning_rate": 3e-06, | |
| "loss": -0.3487, | |
| "step": 93 | |
| }, | |
| { | |
| "epoch": 0.008355555555555555, | |
| "grad_norm": 36.958518981933594, | |
| "learning_rate": 3e-06, | |
| "loss": 0.1995, | |
| "step": 94 | |
| }, | |
| { | |
| "epoch": 0.008444444444444444, | |
| "grad_norm": 34.949676513671875, | |
| "learning_rate": 3e-06, | |
| "loss": -2.9378, | |
| "step": 95 | |
| }, | |
| { | |
| "epoch": 0.008533333333333334, | |
| "grad_norm": 36.523372650146484, | |
| "learning_rate": 3e-06, | |
| "loss": 0.9469, | |
| "step": 96 | |
| }, | |
| { | |
| "completion_length": 246.08333587646484, | |
| "epoch": 0.008622222222222222, | |
| "grad_norm": 59.17626953125, | |
| "learning_rate": 3e-06, | |
| "loss": 2.6896, | |
| "reward": 1.3333333730697632, | |
| "reward_std": 0.5695068836212158, | |
| "rewards/boxed_and_answer_tags_format_reward": 0.5833333432674408, | |
| "rewards/correctness_reward_func_math": 0.7500000149011612, | |
| "step": 97, | |
| "zero_std_ratio": 0.125 | |
| }, | |
| { | |
| "epoch": 0.00871111111111111, | |
| "grad_norm": 71.58135223388672, | |
| "learning_rate": 3e-06, | |
| "loss": 1.0704, | |
| "step": 98 | |
| }, | |
| { | |
| "epoch": 0.0088, | |
| "grad_norm": 65.36974334716797, | |
| "learning_rate": 3e-06, | |
| "loss": -2.7445, | |
| "step": 99 | |
| }, | |
| { | |
| "epoch": 0.008888888888888889, | |
| "grad_norm": 60.50218200683594, | |
| "learning_rate": 3e-06, | |
| "loss": 3.606, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.008977777777777777, | |
| "grad_norm": 61.99585723876953, | |
| "learning_rate": 3e-06, | |
| "loss": -1.4435, | |
| "step": 101 | |
| }, | |
| { | |
| "epoch": 0.009066666666666667, | |
| "grad_norm": 106.92288970947266, | |
| "learning_rate": 3e-06, | |
| "loss": -3.972, | |
| "step": 102 | |
| }, | |
| { | |
| "epoch": 0.009155555555555556, | |
| "grad_norm": 58.85340118408203, | |
| "learning_rate": 3e-06, | |
| "loss": 1.5567, | |
| "step": 103 | |
| }, | |
| { | |
| "epoch": 0.009244444444444444, | |
| "grad_norm": 70.97467041015625, | |
| "learning_rate": 3e-06, | |
| "loss": -0.3592, | |
| "step": 104 | |
| }, | |
| { | |
| "epoch": 0.009333333333333334, | |
| "grad_norm": 62.310516357421875, | |
| "learning_rate": 3e-06, | |
| "loss": -4.0897, | |
| "step": 105 | |
| }, | |
| { | |
| "epoch": 0.009422222222222222, | |
| "grad_norm": 60.98678207397461, | |
| "learning_rate": 3e-06, | |
| "loss": 2.9803, | |
| "step": 106 | |
| }, | |
| { | |
| "epoch": 0.00951111111111111, | |
| "grad_norm": 60.23484420776367, | |
| "learning_rate": 3e-06, | |
| "loss": -2.2749, | |
| "step": 107 | |
| }, | |
| { | |
| "epoch": 0.0096, | |
| "grad_norm": 58.0914192199707, | |
| "learning_rate": 3e-06, | |
| "loss": -4.9462, | |
| "step": 108 | |
| }, | |
| { | |
| "completion_length": 249.06250762939453, | |
| "epoch": 0.00968888888888889, | |
| "grad_norm": 65.09230041503906, | |
| "learning_rate": 3e-06, | |
| "loss": -11.6324, | |
| "reward": 1.125, | |
| "reward_std": 0.5275504291057587, | |
| "rewards/boxed_and_answer_tags_format_reward": 0.5833333432674408, | |
| "rewards/correctness_reward_func_math": 0.5416666716337204, | |
| "step": 109, | |
| "zero_std_ratio": 0.0 | |
| }, | |
| { | |
| "epoch": 0.009777777777777778, | |
| "grad_norm": 58.50445556640625, | |
| "learning_rate": 3e-06, | |
| "loss": -12.2664, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.009866666666666666, | |
| "grad_norm": 53.459251403808594, | |
| "learning_rate": 3e-06, | |
| "loss": -7.2192, | |
| "step": 111 | |
| }, | |
| { | |
| "epoch": 0.009955555555555556, | |
| "grad_norm": 60.34041213989258, | |
| "learning_rate": 3e-06, | |
| "loss": -6.9971, | |
| "step": 112 | |
| }, | |
| { | |
| "epoch": 0.010044444444444444, | |
| "grad_norm": 61.72711944580078, | |
| "learning_rate": 3e-06, | |
| "loss": -0.4686, | |
| "step": 113 | |
| }, | |
| { | |
| "epoch": 0.010133333333333333, | |
| "grad_norm": 96.1756591796875, | |
| "learning_rate": 3e-06, | |
| "loss": -7.4161, | |
| "step": 114 | |
| }, | |
| { | |
| "epoch": 0.010222222222222223, | |
| "grad_norm": 61.3508415222168, | |
| "learning_rate": 3e-06, | |
| "loss": -12.3763, | |
| "step": 115 | |
| }, | |
| { | |
| "epoch": 0.010311111111111111, | |
| "grad_norm": 55.424896240234375, | |
| "learning_rate": 3e-06, | |
| "loss": -12.8949, | |
| "step": 116 | |
| }, | |
| { | |
| "epoch": 0.0104, | |
| "grad_norm": 56.08291244506836, | |
| "learning_rate": 3e-06, | |
| "loss": -7.8472, | |
| "step": 117 | |
| }, | |
| { | |
| "epoch": 0.01048888888888889, | |
| "grad_norm": 73.18891906738281, | |
| "learning_rate": 3e-06, | |
| "loss": -8.0281, | |
| "step": 118 | |
| }, | |
| { | |
| "epoch": 0.010577777777777778, | |
| "grad_norm": 64.47604370117188, | |
| "learning_rate": 3e-06, | |
| "loss": -1.3444, | |
| "step": 119 | |
| }, | |
| { | |
| "epoch": 0.010666666666666666, | |
| "grad_norm": 96.0721664428711, | |
| "learning_rate": 3e-06, | |
| "loss": -8.5737, | |
| "step": 120 | |
| }, | |
| { | |
| "completion_length": 253.0, | |
| "epoch": 0.010755555555555556, | |
| "grad_norm": 60.78779983520508, | |
| "learning_rate": 3e-06, | |
| "loss": -1.605, | |
| "reward": 1.1145833730697632, | |
| "reward_std": 0.4272044152021408, | |
| "rewards/boxed_and_answer_tags_format_reward": 0.6145833432674408, | |
| "rewards/correctness_reward_func_math": 0.4999999850988388, | |
| "step": 121, | |
| "zero_std_ratio": 0.25 | |
| }, | |
| { | |
| "epoch": 0.010844444444444445, | |
| "grad_norm": 49.34260177612305, | |
| "learning_rate": 3e-06, | |
| "loss": -0.2147, | |
| "step": 122 | |
| }, | |
| { | |
| "epoch": 0.010933333333333333, | |
| "grad_norm": 53.38318634033203, | |
| "learning_rate": 3e-06, | |
| "loss": -7.1697, | |
| "step": 123 | |
| }, | |
| { | |
| "epoch": 0.011022222222222221, | |
| "grad_norm": 84.88465881347656, | |
| "learning_rate": 3e-06, | |
| "loss": 4.7029, | |
| "step": 124 | |
| }, | |
| { | |
| "epoch": 0.011111111111111112, | |
| "grad_norm": 50.966583251953125, | |
| "learning_rate": 3e-06, | |
| "loss": -5.2481, | |
| "step": 125 | |
| }, | |
| { | |
| "epoch": 0.0112, | |
| "grad_norm": 64.3619155883789, | |
| "learning_rate": 3e-06, | |
| "loss": -5.9545, | |
| "step": 126 | |
| }, | |
| { | |
| "epoch": 0.011288888888888888, | |
| "grad_norm": 60.359500885009766, | |
| "learning_rate": 3e-06, | |
| "loss": -1.8244, | |
| "step": 127 | |
| }, | |
| { | |
| "epoch": 0.011377777777777778, | |
| "grad_norm": 51.08177947998047, | |
| "learning_rate": 3e-06, | |
| "loss": -0.8292, | |
| "step": 128 | |
| }, | |
| { | |
| "epoch": 0.011466666666666667, | |
| "grad_norm": 53.191165924072266, | |
| "learning_rate": 3e-06, | |
| "loss": -7.7867, | |
| "step": 129 | |
| }, | |
| { | |
| "epoch": 0.011555555555555555, | |
| "grad_norm": 87.42491912841797, | |
| "learning_rate": 3e-06, | |
| "loss": 3.92, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.011644444444444445, | |
| "grad_norm": 49.99729537963867, | |
| "learning_rate": 3e-06, | |
| "loss": -5.9859, | |
| "step": 131 | |
| }, | |
| { | |
| "epoch": 0.011733333333333333, | |
| "grad_norm": 45.14487075805664, | |
| "learning_rate": 3e-06, | |
| "loss": -6.6928, | |
| "step": 132 | |
| }, | |
| { | |
| "completion_length": 239.1666717529297, | |
| "epoch": 0.011822222222222222, | |
| "grad_norm": 68.43509674072266, | |
| "learning_rate": 3e-06, | |
| "loss": 45.5812, | |
| "reward": 1.1666666865348816, | |
| "reward_std": 0.47104020416736603, | |
| "rewards/boxed_and_answer_tags_format_reward": 0.625, | |
| "rewards/correctness_reward_func_math": 0.5416666679084301, | |
| "step": 133, | |
| "zero_std_ratio": 0.125 | |
| }, | |
| { | |
| "epoch": 0.011911111111111112, | |
| "grad_norm": 62.809059143066406, | |
| "learning_rate": 3e-06, | |
| "loss": 47.1017, | |
| "step": 134 | |
| }, | |
| { | |
| "epoch": 0.012, | |
| "grad_norm": 61.8614387512207, | |
| "learning_rate": 3e-06, | |
| "loss": 41.2505, | |
| "step": 135 | |
| }, | |
| { | |
| "epoch": 0.012088888888888889, | |
| "grad_norm": 65.46350860595703, | |
| "learning_rate": 3e-06, | |
| "loss": 44.191, | |
| "step": 136 | |
| }, | |
| { | |
| "epoch": 0.012177777777777777, | |
| "grad_norm": 59.1669807434082, | |
| "learning_rate": 3e-06, | |
| "loss": 36.6508, | |
| "step": 137 | |
| }, | |
| { | |
| "epoch": 0.012266666666666667, | |
| "grad_norm": 55.44610595703125, | |
| "learning_rate": 3e-06, | |
| "loss": 41.1041, | |
| "step": 138 | |
| }, | |
| { | |
| "epoch": 0.012355555555555555, | |
| "grad_norm": 61.210411071777344, | |
| "learning_rate": 3e-06, | |
| "loss": 44.6212, | |
| "step": 139 | |
| }, | |
| { | |
| "epoch": 0.012444444444444444, | |
| "grad_norm": 64.934326171875, | |
| "learning_rate": 3e-06, | |
| "loss": 46.5024, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.012533333333333334, | |
| "grad_norm": 67.7354507446289, | |
| "learning_rate": 3e-06, | |
| "loss": 40.309, | |
| "step": 141 | |
| }, | |
| { | |
| "epoch": 0.012622222222222222, | |
| "grad_norm": 69.55413055419922, | |
| "learning_rate": 3e-06, | |
| "loss": 42.8161, | |
| "step": 142 | |
| }, | |
| { | |
| "epoch": 0.01271111111111111, | |
| "grad_norm": 59.040592193603516, | |
| "learning_rate": 3e-06, | |
| "loss": 35.3869, | |
| "step": 143 | |
| }, | |
| { | |
| "epoch": 0.0128, | |
| "grad_norm": 56.21048355102539, | |
| "learning_rate": 3e-06, | |
| "loss": 39.9197, | |
| "step": 144 | |
| }, | |
| { | |
| "completion_length": 237.37500762939453, | |
| "epoch": 0.012888888888888889, | |
| "grad_norm": 67.88895416259766, | |
| "learning_rate": 3e-06, | |
| "loss": -21.4223, | |
| "reward": 0.8958333730697632, | |
| "reward_std": 0.44294705986976624, | |
| "rewards/boxed_and_answer_tags_format_reward": 0.5625000149011612, | |
| "rewards/correctness_reward_func_math": 0.3333333358168602, | |
| "step": 145, | |
| "zero_std_ratio": 0.375 | |
| }, | |
| { | |
| "epoch": 0.012977777777777777, | |
| "grad_norm": 55.02178955078125, | |
| "learning_rate": 3e-06, | |
| "loss": -26.3881, | |
| "step": 146 | |
| }, | |
| { | |
| "epoch": 0.013066666666666667, | |
| "grad_norm": 103.78085327148438, | |
| "learning_rate": 3e-06, | |
| "loss": -21.7028, | |
| "step": 147 | |
| }, | |
| { | |
| "epoch": 0.013155555555555556, | |
| "grad_norm": 62.1268196105957, | |
| "learning_rate": 3e-06, | |
| "loss": -19.048, | |
| "step": 148 | |
| }, | |
| { | |
| "epoch": 0.013244444444444444, | |
| "grad_norm": 57.99726486206055, | |
| "learning_rate": 3e-06, | |
| "loss": -19.346, | |
| "step": 149 | |
| }, | |
| { | |
| "epoch": 0.013333333333333334, | |
| "grad_norm": 58.639549255371094, | |
| "learning_rate": 3e-06, | |
| "loss": -25.0216, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.013422222222222223, | |
| "grad_norm": 75.58393859863281, | |
| "learning_rate": 3e-06, | |
| "loss": -21.7941, | |
| "step": 151 | |
| }, | |
| { | |
| "epoch": 0.013511111111111111, | |
| "grad_norm": 54.83882522583008, | |
| "learning_rate": 3e-06, | |
| "loss": -27.6056, | |
| "step": 152 | |
| }, | |
| { | |
| "epoch": 0.0136, | |
| "grad_norm": 70.61170196533203, | |
| "learning_rate": 3e-06, | |
| "loss": -21.99, | |
| "step": 153 | |
| }, | |
| { | |
| "epoch": 0.01368888888888889, | |
| "grad_norm": 68.1909408569336, | |
| "learning_rate": 3e-06, | |
| "loss": -20.2119, | |
| "step": 154 | |
| }, | |
| { | |
| "epoch": 0.013777777777777778, | |
| "grad_norm": 68.70491027832031, | |
| "learning_rate": 3e-06, | |
| "loss": -20.2249, | |
| "step": 155 | |
| }, | |
| { | |
| "epoch": 0.013866666666666666, | |
| "grad_norm": 55.29183578491211, | |
| "learning_rate": 3e-06, | |
| "loss": -25.9634, | |
| "step": 156 | |
| }, | |
| { | |
| "completion_length": 250.6666717529297, | |
| "epoch": 0.013955555555555556, | |
| "grad_norm": 70.28712463378906, | |
| "learning_rate": 3e-06, | |
| "loss": -13.869, | |
| "reward": 1.4583333730697632, | |
| "reward_std": 0.6823203265666962, | |
| "rewards/boxed_and_answer_tags_format_reward": 0.625, | |
| "rewards/correctness_reward_func_math": 0.8333333283662796, | |
| "step": 157, | |
| "zero_std_ratio": 0.125 | |
| }, | |
| { | |
| "epoch": 0.014044444444444444, | |
| "grad_norm": 60.210201263427734, | |
| "learning_rate": 3e-06, | |
| "loss": -24.2593, | |
| "step": 158 | |
| }, | |
| { | |
| "epoch": 0.014133333333333333, | |
| "grad_norm": 77.62222290039062, | |
| "learning_rate": 3e-06, | |
| "loss": -11.4696, | |
| "step": 159 | |
| }, | |
| { | |
| "epoch": 0.014222222222222223, | |
| "grad_norm": 70.80023193359375, | |
| "learning_rate": 3e-06, | |
| "loss": -25.8617, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.014311111111111111, | |
| "grad_norm": 64.7750244140625, | |
| "learning_rate": 3e-06, | |
| "loss": -14.8635, | |
| "step": 161 | |
| }, | |
| { | |
| "epoch": 0.0144, | |
| "grad_norm": 77.83097076416016, | |
| "learning_rate": 3e-06, | |
| "loss": -7.727, | |
| "step": 162 | |
| }, | |
| { | |
| "epoch": 0.01448888888888889, | |
| "grad_norm": 79.27497100830078, | |
| "learning_rate": 3e-06, | |
| "loss": -14.3689, | |
| "step": 163 | |
| }, | |
| { | |
| "epoch": 0.014577777777777778, | |
| "grad_norm": 78.7293472290039, | |
| "learning_rate": 3e-06, | |
| "loss": -24.9423, | |
| "step": 164 | |
| }, | |
| { | |
| "epoch": 0.014666666666666666, | |
| "grad_norm": 70.24745178222656, | |
| "learning_rate": 3e-06, | |
| "loss": -12.4543, | |
| "step": 165 | |
| }, | |
| { | |
| "epoch": 0.014755555555555555, | |
| "grad_norm": 75.36212158203125, | |
| "learning_rate": 3e-06, | |
| "loss": -26.7206, | |
| "step": 166 | |
| }, | |
| { | |
| "epoch": 0.014844444444444445, | |
| "grad_norm": 65.05477142333984, | |
| "learning_rate": 3e-06, | |
| "loss": -16.0018, | |
| "step": 167 | |
| }, | |
| { | |
| "epoch": 0.014933333333333333, | |
| "grad_norm": 78.83174133300781, | |
| "learning_rate": 3e-06, | |
| "loss": -8.9618, | |
| "step": 168 | |
| }, | |
| { | |
| "completion_length": 236.2916717529297, | |
| "epoch": 0.015022222222222222, | |
| "grad_norm": 82.83967590332031, | |
| "learning_rate": 3e-06, | |
| "loss": -0.5024, | |
| "reward": 1.1145833730697632, | |
| "reward_std": 0.3740755543112755, | |
| "rewards/boxed_and_answer_tags_format_reward": 0.6145833134651184, | |
| "rewards/correctness_reward_func_math": 0.5, | |
| "step": 169, | |
| "zero_std_ratio": 0.375 | |
| }, | |
| { | |
| "epoch": 0.015111111111111112, | |
| "grad_norm": 51.84051513671875, | |
| "learning_rate": 3e-06, | |
| "loss": 6.7446, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.0152, | |
| "grad_norm": 63.607723236083984, | |
| "learning_rate": 3e-06, | |
| "loss": 2.7771, | |
| "step": 171 | |
| }, | |
| { | |
| "epoch": 0.015288888888888888, | |
| "grad_norm": 52.88029479980469, | |
| "learning_rate": 3e-06, | |
| "loss": 8.1945, | |
| "step": 172 | |
| }, | |
| { | |
| "epoch": 0.015377777777777778, | |
| "grad_norm": 68.90487670898438, | |
| "learning_rate": 3e-06, | |
| "loss": 0.8609, | |
| "step": 173 | |
| }, | |
| { | |
| "epoch": 0.015466666666666667, | |
| "grad_norm": 57.66716766357422, | |
| "learning_rate": 3e-06, | |
| "loss": -0.5103, | |
| "step": 174 | |
| }, | |
| { | |
| "epoch": 0.015555555555555555, | |
| "grad_norm": 69.48858642578125, | |
| "learning_rate": 3e-06, | |
| "loss": -0.89, | |
| "step": 175 | |
| }, | |
| { | |
| "epoch": 0.015644444444444443, | |
| "grad_norm": 51.13008117675781, | |
| "learning_rate": 3e-06, | |
| "loss": 5.8779, | |
| "step": 176 | |
| }, | |
| { | |
| "epoch": 0.015733333333333332, | |
| "grad_norm": 61.48530578613281, | |
| "learning_rate": 3e-06, | |
| "loss": 2.0727, | |
| "step": 177 | |
| }, | |
| { | |
| "epoch": 0.015822222222222224, | |
| "grad_norm": 55.415924072265625, | |
| "learning_rate": 3e-06, | |
| "loss": 7.6559, | |
| "step": 178 | |
| }, | |
| { | |
| "epoch": 0.015911111111111112, | |
| "grad_norm": 65.15290069580078, | |
| "learning_rate": 3e-06, | |
| "loss": -0.6101, | |
| "step": 179 | |
| }, | |
| { | |
| "epoch": 0.016, | |
| "grad_norm": 52.03913879394531, | |
| "learning_rate": 3e-06, | |
| "loss": -1.3899, | |
| "step": 180 | |
| }, | |
| { | |
| "completion_length": 250.14583587646484, | |
| "epoch": 0.01608888888888889, | |
| "grad_norm": 63.963829040527344, | |
| "learning_rate": 3e-06, | |
| "loss": -8.994, | |
| "reward": 0.885416716337204, | |
| "reward_std": 0.3302172925323248, | |
| "rewards/boxed_and_answer_tags_format_reward": 0.59375, | |
| "rewards/correctness_reward_func_math": 0.2916666567325592, | |
| "step": 181, | |
| "zero_std_ratio": 0.5 | |
| }, | |
| { | |
| "epoch": 0.016177777777777777, | |
| "grad_norm": 48.50006866455078, | |
| "learning_rate": 3e-06, | |
| "loss": -0.9428, | |
| "step": 182 | |
| }, | |
| { | |
| "epoch": 0.016266666666666665, | |
| "grad_norm": 58.21607971191406, | |
| "learning_rate": 3e-06, | |
| "loss": -8.2051, | |
| "step": 183 | |
| }, | |
| { | |
| "epoch": 0.016355555555555557, | |
| "grad_norm": 76.80998992919922, | |
| "learning_rate": 3e-06, | |
| "loss": 1.6849, | |
| "step": 184 | |
| }, | |
| { | |
| "epoch": 0.016444444444444446, | |
| "grad_norm": 48.460941314697266, | |
| "learning_rate": 3e-06, | |
| "loss": -3.4021, | |
| "step": 185 | |
| }, | |
| { | |
| "epoch": 0.016533333333333334, | |
| "grad_norm": 55.28091049194336, | |
| "learning_rate": 3e-06, | |
| "loss": 0.4156, | |
| "step": 186 | |
| }, | |
| { | |
| "epoch": 0.016622222222222222, | |
| "grad_norm": 65.21077728271484, | |
| "learning_rate": 3e-06, | |
| "loss": -9.4278, | |
| "step": 187 | |
| }, | |
| { | |
| "epoch": 0.01671111111111111, | |
| "grad_norm": 50.71424865722656, | |
| "learning_rate": 3e-06, | |
| "loss": -2.0682, | |
| "step": 188 | |
| }, | |
| { | |
| "epoch": 0.0168, | |
| "grad_norm": 58.02372360229492, | |
| "learning_rate": 3e-06, | |
| "loss": -9.1068, | |
| "step": 189 | |
| }, | |
| { | |
| "epoch": 0.016888888888888887, | |
| "grad_norm": 61.12031555175781, | |
| "learning_rate": 3e-06, | |
| "loss": 0.679, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.01697777777777778, | |
| "grad_norm": 51.7930908203125, | |
| "learning_rate": 3e-06, | |
| "loss": -4.3214, | |
| "step": 191 | |
| }, | |
| { | |
| "epoch": 0.017066666666666667, | |
| "grad_norm": 48.15507507324219, | |
| "learning_rate": 3e-06, | |
| "loss": -0.8545, | |
| "step": 192 | |
| }, | |
| { | |
| "completion_length": 248.1666717529297, | |
| "epoch": 0.017155555555555556, | |
| "grad_norm": 62.317527770996094, | |
| "learning_rate": 3e-06, | |
| "loss": 13.8287, | |
| "reward": 1.0729167461395264, | |
| "reward_std": 0.348264142870903, | |
| "rewards/boxed_and_answer_tags_format_reward": 0.5729166865348816, | |
| "rewards/correctness_reward_func_math": 0.5, | |
| "step": 193, | |
| "zero_std_ratio": 0.375 | |
| }, | |
| { | |
| "epoch": 0.017244444444444444, | |
| "grad_norm": 43.339691162109375, | |
| "learning_rate": 3e-06, | |
| "loss": 17.523, | |
| "step": 194 | |
| }, | |
| { | |
| "epoch": 0.017333333333333333, | |
| "grad_norm": 48.14270782470703, | |
| "learning_rate": 3e-06, | |
| "loss": 15.9481, | |
| "step": 195 | |
| }, | |
| { | |
| "epoch": 0.01742222222222222, | |
| "grad_norm": 43.32905960083008, | |
| "learning_rate": 3e-06, | |
| "loss": 14.7259, | |
| "step": 196 | |
| }, | |
| { | |
| "epoch": 0.017511111111111113, | |
| "grad_norm": 45.01740264892578, | |
| "learning_rate": 3e-06, | |
| "loss": 13.8658, | |
| "step": 197 | |
| }, | |
| { | |
| "epoch": 0.0176, | |
| "grad_norm": 43.2428092956543, | |
| "learning_rate": 3e-06, | |
| "loss": 20.4664, | |
| "step": 198 | |
| }, | |
| { | |
| "epoch": 0.01768888888888889, | |
| "grad_norm": 56.058616638183594, | |
| "learning_rate": 3e-06, | |
| "loss": 13.1828, | |
| "step": 199 | |
| }, | |
| { | |
| "epoch": 0.017777777777777778, | |
| "grad_norm": 46.968666076660156, | |
| "learning_rate": 3e-06, | |
| "loss": 16.8591, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.017866666666666666, | |
| "grad_norm": 45.98298263549805, | |
| "learning_rate": 3e-06, | |
| "loss": 15.5154, | |
| "step": 201 | |
| }, | |
| { | |
| "epoch": 0.017955555555555554, | |
| "grad_norm": 43.91643142700195, | |
| "learning_rate": 3e-06, | |
| "loss": 14.2328, | |
| "step": 202 | |
| }, | |
| { | |
| "epoch": 0.018044444444444443, | |
| "grad_norm": 44.83538055419922, | |
| "learning_rate": 3e-06, | |
| "loss": 13.1929, | |
| "step": 203 | |
| }, | |
| { | |
| "epoch": 0.018133333333333335, | |
| "grad_norm": 42.83240509033203, | |
| "learning_rate": 3e-06, | |
| "loss": 19.8936, | |
| "step": 204 | |
| }, | |
| { | |
| "completion_length": 249.02083587646484, | |
| "epoch": 0.018222222222222223, | |
| "grad_norm": 119.68338775634766, | |
| "learning_rate": 3e-06, | |
| "loss": -4.5616, | |
| "reward": 0.8229166865348816, | |
| "reward_std": 0.28067073225975037, | |
| "rewards/boxed_and_answer_tags_format_reward": 0.65625, | |
| "rewards/correctness_reward_func_math": 0.1666666716337204, | |
| "step": 205, | |
| "zero_std_ratio": 0.5 | |
| }, | |
| { | |
| "epoch": 0.01831111111111111, | |
| "grad_norm": 40.65678405761719, | |
| "learning_rate": 3e-06, | |
| "loss": 3.7784, | |
| "step": 206 | |
| }, | |
| { | |
| "epoch": 0.0184, | |
| "grad_norm": 51.84949493408203, | |
| "learning_rate": 3e-06, | |
| "loss": 2.132, | |
| "step": 207 | |
| }, | |
| { | |
| "epoch": 0.018488888888888888, | |
| "grad_norm": 40.80442428588867, | |
| "learning_rate": 3e-06, | |
| "loss": -1.7568, | |
| "step": 208 | |
| }, | |
| { | |
| "epoch": 0.018577777777777776, | |
| "grad_norm": 51.88225555419922, | |
| "learning_rate": 3e-06, | |
| "loss": -2.808, | |
| "step": 209 | |
| }, | |
| { | |
| "epoch": 0.018666666666666668, | |
| "grad_norm": 57.230106353759766, | |
| "learning_rate": 3e-06, | |
| "loss": -1.6958, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 0.018755555555555557, | |
| "grad_norm": 65.36343383789062, | |
| "learning_rate": 3e-06, | |
| "loss": -5.016, | |
| "step": 211 | |
| }, | |
| { | |
| "epoch": 0.018844444444444445, | |
| "grad_norm": 42.36751937866211, | |
| "learning_rate": 3e-06, | |
| "loss": 3.2604, | |
| "step": 212 | |
| }, | |
| { | |
| "epoch": 0.018933333333333333, | |
| "grad_norm": 54.347625732421875, | |
| "learning_rate": 3e-06, | |
| "loss": 1.5256, | |
| "step": 213 | |
| }, | |
| { | |
| "epoch": 0.01902222222222222, | |
| "grad_norm": 40.971683502197266, | |
| "learning_rate": 3e-06, | |
| "loss": -2.3704, | |
| "step": 214 | |
| }, | |
| { | |
| "epoch": 0.01911111111111111, | |
| "grad_norm": 51.366546630859375, | |
| "learning_rate": 3e-06, | |
| "loss": -3.7841, | |
| "step": 215 | |
| }, | |
| { | |
| "epoch": 0.0192, | |
| "grad_norm": 64.25231170654297, | |
| "learning_rate": 3e-06, | |
| "loss": -2.6423, | |
| "step": 216 | |
| }, | |
| { | |
| "completion_length": 250.4791717529297, | |
| "epoch": 0.01928888888888889, | |
| "grad_norm": 60.69169235229492, | |
| "learning_rate": 3e-06, | |
| "loss": 3.7848, | |
| "reward": 1.0833333730697632, | |
| "reward_std": 0.5039487332105637, | |
| "rewards/boxed_and_answer_tags_format_reward": 0.625, | |
| "rewards/correctness_reward_func_math": 0.4583333283662796, | |
| "step": 217, | |
| "zero_std_ratio": 0.25 | |
| }, | |
| { | |
| "epoch": 0.01937777777777778, | |
| "grad_norm": 65.37804412841797, | |
| "learning_rate": 3e-06, | |
| "loss": -5.4996, | |
| "step": 218 | |
| }, | |
| { | |
| "epoch": 0.019466666666666667, | |
| "grad_norm": 58.69138717651367, | |
| "learning_rate": 3e-06, | |
| "loss": 0.3025, | |
| "step": 219 | |
| }, | |
| { | |
| "epoch": 0.019555555555555555, | |
| "grad_norm": 72.17839813232422, | |
| "learning_rate": 3e-06, | |
| "loss": -0.8041, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.019644444444444444, | |
| "grad_norm": 69.56704711914062, | |
| "learning_rate": 3e-06, | |
| "loss": 3.1412, | |
| "step": 221 | |
| }, | |
| { | |
| "epoch": 0.019733333333333332, | |
| "grad_norm": 64.57500457763672, | |
| "learning_rate": 3e-06, | |
| "loss": 6.7244, | |
| "step": 222 | |
| }, | |
| { | |
| "epoch": 0.019822222222222224, | |
| "grad_norm": 65.06715393066406, | |
| "learning_rate": 3e-06, | |
| "loss": 3.0694, | |
| "step": 223 | |
| }, | |
| { | |
| "epoch": 0.019911111111111112, | |
| "grad_norm": 72.74304962158203, | |
| "learning_rate": 3e-06, | |
| "loss": -5.928, | |
| "step": 224 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "grad_norm": 62.06201934814453, | |
| "learning_rate": 3e-06, | |
| "loss": -0.2234, | |
| "step": 225 | |
| }, | |
| { | |
| "epoch": 0.02008888888888889, | |
| "grad_norm": 74.25010681152344, | |
| "learning_rate": 3e-06, | |
| "loss": -1.5055, | |
| "step": 226 | |
| }, | |
| { | |
| "epoch": 0.020177777777777777, | |
| "grad_norm": 64.32748413085938, | |
| "learning_rate": 3e-06, | |
| "loss": 2.4819, | |
| "step": 227 | |
| }, | |
| { | |
| "epoch": 0.020266666666666665, | |
| "grad_norm": 64.75834655761719, | |
| "learning_rate": 3e-06, | |
| "loss": 5.504, | |
| "step": 228 | |
| }, | |
| { | |
| "completion_length": 252.9375, | |
| "epoch": 0.020355555555555557, | |
| "grad_norm": 86.62522888183594, | |
| "learning_rate": 3e-06, | |
| "loss": 14.2874, | |
| "reward": 1.1562500596046448, | |
| "reward_std": 0.5227071046829224, | |
| "rewards/boxed_and_answer_tags_format_reward": 0.7395833432674408, | |
| "rewards/correctness_reward_func_math": 0.4166666567325592, | |
| "step": 229, | |
| "zero_std_ratio": 0.375 | |
| }, | |
| { | |
| "epoch": 0.020444444444444446, | |
| "grad_norm": 61.35566329956055, | |
| "learning_rate": 3e-06, | |
| "loss": 17.7162, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 0.020533333333333334, | |
| "grad_norm": 61.87510681152344, | |
| "learning_rate": 3e-06, | |
| "loss": 8.9931, | |
| "step": 231 | |
| }, | |
| { | |
| "epoch": 0.020622222222222222, | |
| "grad_norm": 57.673770904541016, | |
| "learning_rate": 3e-06, | |
| "loss": 14.8206, | |
| "step": 232 | |
| }, | |
| { | |
| "epoch": 0.02071111111111111, | |
| "grad_norm": 64.32942199707031, | |
| "learning_rate": 3e-06, | |
| "loss": 10.9396, | |
| "step": 233 | |
| }, | |
| { | |
| "epoch": 0.0208, | |
| "grad_norm": 66.23136138916016, | |
| "learning_rate": 3e-06, | |
| "loss": 15.6923, | |
| "step": 234 | |
| }, | |
| { | |
| "epoch": 0.020888888888888887, | |
| "grad_norm": 74.55809783935547, | |
| "learning_rate": 3e-06, | |
| "loss": 13.7889, | |
| "step": 235 | |
| }, | |
| { | |
| "epoch": 0.02097777777777778, | |
| "grad_norm": 60.680240631103516, | |
| "learning_rate": 3e-06, | |
| "loss": 17.2098, | |
| "step": 236 | |
| }, | |
| { | |
| "epoch": 0.021066666666666668, | |
| "grad_norm": 63.526371002197266, | |
| "learning_rate": 3e-06, | |
| "loss": 8.3714, | |
| "step": 237 | |
| }, | |
| { | |
| "epoch": 0.021155555555555556, | |
| "grad_norm": 60.387813568115234, | |
| "learning_rate": 3e-06, | |
| "loss": 13.9642, | |
| "step": 238 | |
| }, | |
| { | |
| "epoch": 0.021244444444444444, | |
| "grad_norm": 65.6108169555664, | |
| "learning_rate": 3e-06, | |
| "loss": 10.0042, | |
| "step": 239 | |
| }, | |
| { | |
| "epoch": 0.021333333333333333, | |
| "grad_norm": 65.62525177001953, | |
| "learning_rate": 3e-06, | |
| "loss": 14.6078, | |
| "step": 240 | |
| }, | |
| { | |
| "completion_length": 245.89584350585938, | |
| "epoch": 0.02142222222222222, | |
| "grad_norm": 99.76964569091797, | |
| "learning_rate": 3e-06, | |
| "loss": -5.9045, | |
| "reward": 1.2916666865348816, | |
| "reward_std": 0.23116151243448257, | |
| "rewards/boxed_and_answer_tags_format_reward": 0.7083333432674408, | |
| "rewards/correctness_reward_func_math": 0.5833333358168602, | |
| "step": 241, | |
| "zero_std_ratio": 0.375 | |
| }, | |
| { | |
| "epoch": 0.021511111111111113, | |
| "grad_norm": 34.929359436035156, | |
| "learning_rate": 3e-06, | |
| "loss": -5.1064, | |
| "step": 242 | |
| }, | |
| { | |
| "epoch": 0.0216, | |
| "grad_norm": 41.406982421875, | |
| "learning_rate": 3e-06, | |
| "loss": -7.9375, | |
| "step": 243 | |
| }, | |
| { | |
| "epoch": 0.02168888888888889, | |
| "grad_norm": 40.73991775512695, | |
| "learning_rate": 3e-06, | |
| "loss": -4.6122, | |
| "step": 244 | |
| }, | |
| { | |
| "epoch": 0.021777777777777778, | |
| "grad_norm": 32.28548812866211, | |
| "learning_rate": 3e-06, | |
| "loss": -3.2865, | |
| "step": 245 | |
| }, | |
| { | |
| "epoch": 0.021866666666666666, | |
| "grad_norm": 37.392860412597656, | |
| "learning_rate": 3e-06, | |
| "loss": -0.2002, | |
| "step": 246 | |
| }, | |
| { | |
| "epoch": 0.021955555555555555, | |
| "grad_norm": 105.95482635498047, | |
| "learning_rate": 3e-06, | |
| "loss": -5.7761, | |
| "step": 247 | |
| }, | |
| { | |
| "epoch": 0.022044444444444443, | |
| "grad_norm": 35.37491226196289, | |
| "learning_rate": 3e-06, | |
| "loss": -5.3203, | |
| "step": 248 | |
| }, | |
| { | |
| "epoch": 0.022133333333333335, | |
| "grad_norm": 37.672000885009766, | |
| "learning_rate": 3e-06, | |
| "loss": -8.3175, | |
| "step": 249 | |
| }, | |
| { | |
| "epoch": 0.022222222222222223, | |
| "grad_norm": 34.36002731323242, | |
| "learning_rate": 3e-06, | |
| "loss": -4.8629, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.02231111111111111, | |
| "grad_norm": 35.60414123535156, | |
| "learning_rate": 3e-06, | |
| "loss": -3.8192, | |
| "step": 251 | |
| }, | |
| { | |
| "epoch": 0.0224, | |
| "grad_norm": 38.58955764770508, | |
| "learning_rate": 3e-06, | |
| "loss": -0.897, | |
| "step": 252 | |
| }, | |
| { | |
| "completion_length": 250.1875, | |
| "epoch": 0.022488888888888888, | |
| "grad_norm": 71.29794311523438, | |
| "learning_rate": 3e-06, | |
| "loss": 2.5796, | |
| "reward": 0.8645833432674408, | |
| "reward_std": 0.3201860636472702, | |
| "rewards/boxed_and_answer_tags_format_reward": 0.5729166716337204, | |
| "rewards/correctness_reward_func_math": 0.2916666641831398, | |
| "step": 253, | |
| "zero_std_ratio": 0.625 | |
| }, | |
| { | |
| "epoch": 0.022577777777777776, | |
| "grad_norm": 62.11003112792969, | |
| "learning_rate": 3e-06, | |
| "loss": 2.4232, | |
| "step": 254 | |
| }, | |
| { | |
| "epoch": 0.02266666666666667, | |
| "grad_norm": 57.18949508666992, | |
| "learning_rate": 3e-06, | |
| "loss": 5.9388, | |
| "step": 255 | |
| }, | |
| { | |
| "epoch": 0.022755555555555557, | |
| "grad_norm": 60.49555206298828, | |
| "learning_rate": 3e-06, | |
| "loss": 5.5698, | |
| "step": 256 | |
| }, | |
| { | |
| "epoch": 0.022844444444444445, | |
| "grad_norm": 134.1082305908203, | |
| "learning_rate": 3e-06, | |
| "loss": -5.3771, | |
| "step": 257 | |
| }, | |
| { | |
| "epoch": 0.022933333333333333, | |
| "grad_norm": 170.15768432617188, | |
| "learning_rate": 3e-06, | |
| "loss": 6.3811, | |
| "step": 258 | |
| }, | |
| { | |
| "epoch": 0.02302222222222222, | |
| "grad_norm": 70.64490509033203, | |
| "learning_rate": 3e-06, | |
| "loss": 1.7661, | |
| "step": 259 | |
| }, | |
| { | |
| "epoch": 0.02311111111111111, | |
| "grad_norm": 71.96417999267578, | |
| "learning_rate": 3e-06, | |
| "loss": 0.8909, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 0.0232, | |
| "grad_norm": 58.19865417480469, | |
| "learning_rate": 3e-06, | |
| "loss": 5.1442, | |
| "step": 261 | |
| }, | |
| { | |
| "epoch": 0.02328888888888889, | |
| "grad_norm": 61.813690185546875, | |
| "learning_rate": 3e-06, | |
| "loss": 4.1458, | |
| "step": 262 | |
| }, | |
| { | |
| "epoch": 0.02337777777777778, | |
| "grad_norm": 63.21968460083008, | |
| "learning_rate": 3e-06, | |
| "loss": -6.5992, | |
| "step": 263 | |
| }, | |
| { | |
| "epoch": 0.023466666666666667, | |
| "grad_norm": 130.61351013183594, | |
| "learning_rate": 3e-06, | |
| "loss": 4.4745, | |
| "step": 264 | |
| }, | |
| { | |
| "completion_length": 254.58333587646484, | |
| "epoch": 0.023555555555555555, | |
| "grad_norm": 37.4125862121582, | |
| "learning_rate": 3e-06, | |
| "loss": 6.5013, | |
| "reward": 1.0000000298023224, | |
| "reward_std": 0.32049281150102615, | |
| "rewards/boxed_and_answer_tags_format_reward": 0.7083333134651184, | |
| "rewards/correctness_reward_func_math": 0.2916666679084301, | |
| "step": 265, | |
| "zero_std_ratio": 0.5 | |
| }, | |
| { | |
| "epoch": 0.023644444444444444, | |
| "grad_norm": 47.14967346191406, | |
| "learning_rate": 3e-06, | |
| "loss": 0.9939, | |
| "step": 266 | |
| }, | |
| { | |
| "epoch": 0.023733333333333332, | |
| "grad_norm": 52.939048767089844, | |
| "learning_rate": 3e-06, | |
| "loss": -0.9139, | |
| "step": 267 | |
| }, | |
| { | |
| "epoch": 0.023822222222222224, | |
| "grad_norm": 86.99070739746094, | |
| "learning_rate": 3e-06, | |
| "loss": 8.1755, | |
| "step": 268 | |
| }, | |
| { | |
| "epoch": 0.023911111111111112, | |
| "grad_norm": 39.69975280761719, | |
| "learning_rate": 3e-06, | |
| "loss": 4.7483, | |
| "step": 269 | |
| }, | |
| { | |
| "epoch": 0.024, | |
| "grad_norm": 59.52255630493164, | |
| "learning_rate": 3e-06, | |
| "loss": 1.1483, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 0.02408888888888889, | |
| "grad_norm": 38.91862106323242, | |
| "learning_rate": 3e-06, | |
| "loss": 6.09, | |
| "step": 271 | |
| }, | |
| { | |
| "epoch": 0.024177777777777777, | |
| "grad_norm": 43.66323471069336, | |
| "learning_rate": 3e-06, | |
| "loss": 0.4765, | |
| "step": 272 | |
| }, | |
| { | |
| "epoch": 0.024266666666666666, | |
| "grad_norm": 44.54389572143555, | |
| "learning_rate": 3e-06, | |
| "loss": -1.5437, | |
| "step": 273 | |
| }, | |
| { | |
| "epoch": 0.024355555555555554, | |
| "grad_norm": 84.41556549072266, | |
| "learning_rate": 3e-06, | |
| "loss": 7.1028, | |
| "step": 274 | |
| }, | |
| { | |
| "epoch": 0.024444444444444446, | |
| "grad_norm": 38.220367431640625, | |
| "learning_rate": 3e-06, | |
| "loss": 4.0407, | |
| "step": 275 | |
| }, | |
| { | |
| "epoch": 0.024533333333333334, | |
| "grad_norm": 45.620452880859375, | |
| "learning_rate": 3e-06, | |
| "loss": 0.5047, | |
| "step": 276 | |
| }, | |
| { | |
| "completion_length": 243.18750762939453, | |
| "epoch": 0.024622222222222222, | |
| "grad_norm": 84.81961059570312, | |
| "learning_rate": 3e-06, | |
| "loss": 5.442, | |
| "reward": 1.3333333730697632, | |
| "reward_std": 0.4887756109237671, | |
| "rewards/boxed_and_answer_tags_format_reward": 0.625, | |
| "rewards/correctness_reward_func_math": 0.7083333432674408, | |
| "step": 277, | |
| "zero_std_ratio": 0.0 | |
| }, | |
| { | |
| "epoch": 0.02471111111111111, | |
| "grad_norm": 112.86151885986328, | |
| "learning_rate": 3e-06, | |
| "loss": -1.2528, | |
| "step": 278 | |
| }, | |
| { | |
| "epoch": 0.0248, | |
| "grad_norm": 76.52424621582031, | |
| "learning_rate": 3e-06, | |
| "loss": -4.5325, | |
| "step": 279 | |
| }, | |
| { | |
| "epoch": 0.024888888888888887, | |
| "grad_norm": 94.0294189453125, | |
| "learning_rate": 3e-06, | |
| "loss": -6.6167, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 0.02497777777777778, | |
| "grad_norm": 78.60155487060547, | |
| "learning_rate": 3e-06, | |
| "loss": 0.2653, | |
| "step": 281 | |
| }, | |
| { | |
| "epoch": 0.025066666666666668, | |
| "grad_norm": 58.42827224731445, | |
| "learning_rate": 3e-06, | |
| "loss": -5.6591, | |
| "step": 282 | |
| }, | |
| { | |
| "epoch": 0.025155555555555556, | |
| "grad_norm": 81.04212188720703, | |
| "learning_rate": 3e-06, | |
| "loss": 4.2739, | |
| "step": 283 | |
| }, | |
| { | |
| "epoch": 0.025244444444444444, | |
| "grad_norm": 67.27478790283203, | |
| "learning_rate": 3e-06, | |
| "loss": -1.5776, | |
| "step": 284 | |
| }, | |
| { | |
| "epoch": 0.025333333333333333, | |
| "grad_norm": 114.41588592529297, | |
| "learning_rate": 3e-06, | |
| "loss": -5.4532, | |
| "step": 285 | |
| }, | |
| { | |
| "epoch": 0.02542222222222222, | |
| "grad_norm": 75.61115264892578, | |
| "learning_rate": 3e-06, | |
| "loss": -7.4401, | |
| "step": 286 | |
| }, | |
| { | |
| "epoch": 0.02551111111111111, | |
| "grad_norm": 236.67214965820312, | |
| "learning_rate": 3e-06, | |
| "loss": -0.6833, | |
| "step": 287 | |
| }, | |
| { | |
| "epoch": 0.0256, | |
| "grad_norm": 59.2407341003418, | |
| "learning_rate": 3e-06, | |
| "loss": -6.8172, | |
| "step": 288 | |
| }, | |
| { | |
| "completion_length": 250.20834350585938, | |
| "epoch": 0.02568888888888889, | |
| "grad_norm": 93.99871063232422, | |
| "learning_rate": 3e-06, | |
| "loss": -12.9867, | |
| "reward": 1.4687500596046448, | |
| "reward_std": 0.6822589337825775, | |
| "rewards/boxed_and_answer_tags_format_reward": 0.6770833432674408, | |
| "rewards/correctness_reward_func_math": 0.7916666865348816, | |
| "step": 289, | |
| "zero_std_ratio": 0.25 | |
| }, | |
| { | |
| "epoch": 0.025777777777777778, | |
| "grad_norm": 79.78562927246094, | |
| "learning_rate": 3e-06, | |
| "loss": -10.387, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 0.025866666666666666, | |
| "grad_norm": 150.55654907226562, | |
| "learning_rate": 3e-06, | |
| "loss": -11.9684, | |
| "step": 291 | |
| }, | |
| { | |
| "epoch": 0.025955555555555555, | |
| "grad_norm": 86.15855407714844, | |
| "learning_rate": 3e-06, | |
| "loss": -13.3488, | |
| "step": 292 | |
| }, | |
| { | |
| "epoch": 0.026044444444444443, | |
| "grad_norm": 82.68080139160156, | |
| "learning_rate": 3e-06, | |
| "loss": -9.7978, | |
| "step": 293 | |
| }, | |
| { | |
| "epoch": 0.026133333333333335, | |
| "grad_norm": 73.47705841064453, | |
| "learning_rate": 3e-06, | |
| "loss": -15.2661, | |
| "step": 294 | |
| }, | |
| { | |
| "epoch": 0.026222222222222223, | |
| "grad_norm": 88.39766693115234, | |
| "learning_rate": 3e-06, | |
| "loss": -14.0408, | |
| "step": 295 | |
| }, | |
| { | |
| "epoch": 0.02631111111111111, | |
| "grad_norm": 81.03710174560547, | |
| "learning_rate": 3e-06, | |
| "loss": -11.6435, | |
| "step": 296 | |
| }, | |
| { | |
| "epoch": 0.0264, | |
| "grad_norm": 97.82394409179688, | |
| "learning_rate": 3e-06, | |
| "loss": -12.9819, | |
| "step": 297 | |
| }, | |
| { | |
| "epoch": 0.026488888888888888, | |
| "grad_norm": 91.29530334472656, | |
| "learning_rate": 3e-06, | |
| "loss": -14.6218, | |
| "step": 298 | |
| }, | |
| { | |
| "epoch": 0.026577777777777777, | |
| "grad_norm": 76.14654541015625, | |
| "learning_rate": 3e-06, | |
| "loss": -10.3319, | |
| "step": 299 | |
| }, | |
| { | |
| "epoch": 0.02666666666666667, | |
| "grad_norm": 76.4620590209961, | |
| "learning_rate": 3e-06, | |
| "loss": -16.649, | |
| "step": 300 | |
| }, | |
| { | |
| "completion_length": 245.87500762939453, | |
| "epoch": 0.026755555555555557, | |
| "grad_norm": 66.47940063476562, | |
| "learning_rate": 3e-06, | |
| "loss": -2.4273, | |
| "reward": 1.2916666865348816, | |
| "reward_std": 0.4701542556285858, | |
| "rewards/boxed_and_answer_tags_format_reward": 0.75, | |
| "rewards/correctness_reward_func_math": 0.5416666716337204, | |
| "step": 301, | |
| "zero_std_ratio": 0.5 | |
| }, | |
| { | |
| "epoch": 0.026844444444444445, | |
| "grad_norm": 57.04201126098633, | |
| "learning_rate": 3e-06, | |
| "loss": -2.7348, | |
| "step": 302 | |
| }, | |
| { | |
| "epoch": 0.026933333333333333, | |
| "grad_norm": 67.42317962646484, | |
| "learning_rate": 3e-06, | |
| "loss": -0.5419, | |
| "step": 303 | |
| }, | |
| { | |
| "epoch": 0.027022222222222222, | |
| "grad_norm": 68.1643295288086, | |
| "learning_rate": 3e-06, | |
| "loss": -4.7537, | |
| "step": 304 | |
| }, | |
| { | |
| "epoch": 0.02711111111111111, | |
| "grad_norm": 66.15480041503906, | |
| "learning_rate": 3e-06, | |
| "loss": -3.0219, | |
| "step": 305 | |
| }, | |
| { | |
| "epoch": 0.0272, | |
| "grad_norm": 72.49027252197266, | |
| "learning_rate": 3e-06, | |
| "loss": -0.992, | |
| "step": 306 | |
| }, | |
| { | |
| "epoch": 0.02728888888888889, | |
| "grad_norm": 63.84511947631836, | |
| "learning_rate": 3e-06, | |
| "loss": -2.9532, | |
| "step": 307 | |
| }, | |
| { | |
| "epoch": 0.02737777777777778, | |
| "grad_norm": 60.41191864013672, | |
| "learning_rate": 3e-06, | |
| "loss": -3.5425, | |
| "step": 308 | |
| }, | |
| { | |
| "epoch": 0.027466666666666667, | |
| "grad_norm": 75.88224029541016, | |
| "learning_rate": 3e-06, | |
| "loss": -0.626, | |
| "step": 309 | |
| }, | |
| { | |
| "epoch": 0.027555555555555555, | |
| "grad_norm": 60.12965774536133, | |
| "learning_rate": 3e-06, | |
| "loss": -4.971, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 0.027644444444444444, | |
| "grad_norm": 67.24330139160156, | |
| "learning_rate": 3e-06, | |
| "loss": -3.5011, | |
| "step": 311 | |
| }, | |
| { | |
| "epoch": 0.027733333333333332, | |
| "grad_norm": 66.98039245605469, | |
| "learning_rate": 3e-06, | |
| "loss": -1.487, | |
| "step": 312 | |
| }, | |
| { | |
| "completion_length": 255.27083587646484, | |
| "epoch": 0.027822222222222224, | |
| "grad_norm": 36.04975891113281, | |
| "learning_rate": 3e-06, | |
| "loss": -0.5188, | |
| "reward": 1.5312500596046448, | |
| "reward_std": 0.1546149756759405, | |
| "rewards/boxed_and_answer_tags_format_reward": 0.6145833432674408, | |
| "rewards/correctness_reward_func_math": 0.9166666567325592, | |
| "step": 313, | |
| "zero_std_ratio": 0.75 | |
| }, | |
| { | |
| "epoch": 0.027911111111111112, | |
| "grad_norm": 35.71009063720703, | |
| "learning_rate": 3e-06, | |
| "loss": 2.2571, | |
| "step": 314 | |
| }, | |
| { | |
| "epoch": 0.028, | |
| "grad_norm": 43.758975982666016, | |
| "learning_rate": 3e-06, | |
| "loss": 1.5113, | |
| "step": 315 | |
| }, | |
| { | |
| "epoch": 0.02808888888888889, | |
| "grad_norm": 42.099124908447266, | |
| "learning_rate": 3e-06, | |
| "loss": -1.203, | |
| "step": 316 | |
| }, | |
| { | |
| "epoch": 0.028177777777777777, | |
| "grad_norm": 46.69057846069336, | |
| "learning_rate": 3e-06, | |
| "loss": 2.1907, | |
| "step": 317 | |
| }, | |
| { | |
| "epoch": 0.028266666666666666, | |
| "grad_norm": 43.00071334838867, | |
| "learning_rate": 3e-06, | |
| "loss": -0.0413, | |
| "step": 318 | |
| }, | |
| { | |
| "epoch": 0.028355555555555554, | |
| "grad_norm": 36.017799377441406, | |
| "learning_rate": 3e-06, | |
| "loss": -0.9995, | |
| "step": 319 | |
| }, | |
| { | |
| "epoch": 0.028444444444444446, | |
| "grad_norm": 35.86075973510742, | |
| "learning_rate": 3e-06, | |
| "loss": 1.8159, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 0.028533333333333334, | |
| "grad_norm": 46.41409683227539, | |
| "learning_rate": 3e-06, | |
| "loss": 0.8693, | |
| "step": 321 | |
| }, | |
| { | |
| "epoch": 0.028622222222222223, | |
| "grad_norm": 42.182472229003906, | |
| "learning_rate": 3e-06, | |
| "loss": -1.9042, | |
| "step": 322 | |
| }, | |
| { | |
| "epoch": 0.02871111111111111, | |
| "grad_norm": 47.805999755859375, | |
| "learning_rate": 3e-06, | |
| "loss": 1.6417, | |
| "step": 323 | |
| }, | |
| { | |
| "epoch": 0.0288, | |
| "grad_norm": 45.03670883178711, | |
| "learning_rate": 3e-06, | |
| "loss": -1.09, | |
| "step": 324 | |
| }, | |
| { | |
| "completion_length": 252.64583587646484, | |
| "epoch": 0.028888888888888888, | |
| "grad_norm": 58.358917236328125, | |
| "learning_rate": 3e-06, | |
| "loss": -12.4368, | |
| "reward": 1.1666666865348816, | |
| "reward_std": 0.37967559695243835, | |
| "rewards/boxed_and_answer_tags_format_reward": 0.6666666567325592, | |
| "rewards/correctness_reward_func_math": 0.5, | |
| "step": 325, | |
| "zero_std_ratio": 0.625 | |
| }, | |
| { | |
| "epoch": 0.02897777777777778, | |
| "grad_norm": 70.42740631103516, | |
| "learning_rate": 3e-06, | |
| "loss": -6.5213, | |
| "step": 326 | |
| }, | |
| { | |
| "epoch": 0.029066666666666668, | |
| "grad_norm": 71.7884750366211, | |
| "learning_rate": 3e-06, | |
| "loss": -14.7372, | |
| "step": 327 | |
| }, | |
| { | |
| "epoch": 0.029155555555555556, | |
| "grad_norm": 64.89356231689453, | |
| "learning_rate": 3e-06, | |
| "loss": -2.411, | |
| "step": 328 | |
| }, | |
| { | |
| "epoch": 0.029244444444444444, | |
| "grad_norm": 63.557125091552734, | |
| "learning_rate": 3e-06, | |
| "loss": -6.2777, | |
| "step": 329 | |
| }, | |
| { | |
| "epoch": 0.029333333333333333, | |
| "grad_norm": 55.46377182006836, | |
| "learning_rate": 3e-06, | |
| "loss": -6.9502, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 0.02942222222222222, | |
| "grad_norm": 67.63842010498047, | |
| "learning_rate": 3e-06, | |
| "loss": -13.2148, | |
| "step": 331 | |
| }, | |
| { | |
| "epoch": 0.02951111111111111, | |
| "grad_norm": 69.31304931640625, | |
| "learning_rate": 3e-06, | |
| "loss": -7.451, | |
| "step": 332 | |
| }, | |
| { | |
| "epoch": 0.0296, | |
| "grad_norm": 72.68626403808594, | |
| "learning_rate": 3e-06, | |
| "loss": -15.5911, | |
| "step": 333 | |
| }, | |
| { | |
| "epoch": 0.02968888888888889, | |
| "grad_norm": 67.20828247070312, | |
| "learning_rate": 3e-06, | |
| "loss": -3.4952, | |
| "step": 334 | |
| }, | |
| { | |
| "epoch": 0.029777777777777778, | |
| "grad_norm": 71.56851959228516, | |
| "learning_rate": 3e-06, | |
| "loss": -6.8577, | |
| "step": 335 | |
| }, | |
| { | |
| "epoch": 0.029866666666666666, | |
| "grad_norm": 55.80412292480469, | |
| "learning_rate": 3e-06, | |
| "loss": -7.9959, | |
| "step": 336 | |
| }, | |
| { | |
| "completion_length": 254.33333587646484, | |
| "epoch": 0.029955555555555555, | |
| "grad_norm": 83.18997955322266, | |
| "learning_rate": 3e-06, | |
| "loss": -4.4206, | |
| "reward": 1.3854166865348816, | |
| "reward_std": 0.6009446382522583, | |
| "rewards/boxed_and_answer_tags_format_reward": 0.6354166567325592, | |
| "rewards/correctness_reward_func_math": 0.75, | |
| "step": 337, | |
| "zero_std_ratio": 0.25 | |
| }, | |
| { | |
| "epoch": 0.030044444444444443, | |
| "grad_norm": 76.95658111572266, | |
| "learning_rate": 3e-06, | |
| "loss": 0.0507, | |
| "step": 338 | |
| }, | |
| { | |
| "epoch": 0.030133333333333335, | |
| "grad_norm": 99.8234634399414, | |
| "learning_rate": 3e-06, | |
| "loss": -4.7763, | |
| "step": 339 | |
| }, | |
| { | |
| "epoch": 0.030222222222222223, | |
| "grad_norm": 89.73624420166016, | |
| "learning_rate": 3e-06, | |
| "loss": -12.8145, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 0.03031111111111111, | |
| "grad_norm": 105.25814819335938, | |
| "learning_rate": 3e-06, | |
| "loss": 1.3688, | |
| "step": 341 | |
| }, | |
| { | |
| "epoch": 0.0304, | |
| "grad_norm": 91.62116241455078, | |
| "learning_rate": 3e-06, | |
| "loss": -7.2119, | |
| "step": 342 | |
| }, | |
| { | |
| "epoch": 0.03048888888888889, | |
| "grad_norm": 89.00618743896484, | |
| "learning_rate": 3e-06, | |
| "loss": -5.8364, | |
| "step": 343 | |
| }, | |
| { | |
| "epoch": 0.030577777777777777, | |
| "grad_norm": 75.79231262207031, | |
| "learning_rate": 3e-06, | |
| "loss": -1.9053, | |
| "step": 344 | |
| }, | |
| { | |
| "epoch": 0.030666666666666665, | |
| "grad_norm": 109.15798950195312, | |
| "learning_rate": 3e-06, | |
| "loss": -7.4347, | |
| "step": 345 | |
| }, | |
| { | |
| "epoch": 0.030755555555555557, | |
| "grad_norm": 91.9997787475586, | |
| "learning_rate": 3e-06, | |
| "loss": -15.0307, | |
| "step": 346 | |
| }, | |
| { | |
| "epoch": 0.030844444444444445, | |
| "grad_norm": 113.604248046875, | |
| "learning_rate": 3e-06, | |
| "loss": -0.8063, | |
| "step": 347 | |
| }, | |
| { | |
| "epoch": 0.030933333333333334, | |
| "grad_norm": 90.35537719726562, | |
| "learning_rate": 3e-06, | |
| "loss": -9.993, | |
| "step": 348 | |
| }, | |
| { | |
| "completion_length": 241.06250762939453, | |
| "epoch": 0.031022222222222222, | |
| "grad_norm": 79.29890441894531, | |
| "learning_rate": 3e-06, | |
| "loss": -11.3458, | |
| "reward": 1.6979167461395264, | |
| "reward_std": 0.5608386099338531, | |
| "rewards/boxed_and_answer_tags_format_reward": 0.65625, | |
| "rewards/correctness_reward_func_math": 1.0416666567325592, | |
| "step": 349, | |
| "zero_std_ratio": 0.25 | |
| }, | |
| { | |
| "epoch": 0.03111111111111111, | |
| "grad_norm": 85.71048736572266, | |
| "learning_rate": 3e-06, | |
| "loss": -0.4666, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.0312, | |
| "grad_norm": 74.05301666259766, | |
| "learning_rate": 3e-06, | |
| "loss": -17.7307, | |
| "step": 351 | |
| }, | |
| { | |
| "epoch": 0.03128888888888889, | |
| "grad_norm": 77.7562026977539, | |
| "learning_rate": 3e-06, | |
| "loss": -10.9744, | |
| "step": 352 | |
| }, | |
| { | |
| "epoch": 0.031377777777777775, | |
| "grad_norm": 81.5293197631836, | |
| "learning_rate": 3e-06, | |
| "loss": -7.753, | |
| "step": 353 | |
| }, | |
| { | |
| "epoch": 0.031466666666666664, | |
| "grad_norm": 74.96295166015625, | |
| "learning_rate": 3e-06, | |
| "loss": -4.9168, | |
| "step": 354 | |
| }, | |
| { | |
| "epoch": 0.03155555555555556, | |
| "grad_norm": 77.19646453857422, | |
| "learning_rate": 3e-06, | |
| "loss": -12.4852, | |
| "step": 355 | |
| }, | |
| { | |
| "epoch": 0.03164444444444445, | |
| "grad_norm": 88.24592590332031, | |
| "learning_rate": 3e-06, | |
| "loss": -2.2546, | |
| "step": 356 | |
| }, | |
| { | |
| "epoch": 0.031733333333333336, | |
| "grad_norm": 69.32099151611328, | |
| "learning_rate": 3e-06, | |
| "loss": -18.7936, | |
| "step": 357 | |
| }, | |
| { | |
| "epoch": 0.031822222222222224, | |
| "grad_norm": 74.59849548339844, | |
| "learning_rate": 3e-06, | |
| "loss": -12.5008, | |
| "step": 358 | |
| }, | |
| { | |
| "epoch": 0.03191111111111111, | |
| "grad_norm": 89.21590423583984, | |
| "learning_rate": 3e-06, | |
| "loss": -9.1562, | |
| "step": 359 | |
| }, | |
| { | |
| "epoch": 0.032, | |
| "grad_norm": 70.9638671875, | |
| "learning_rate": 3e-06, | |
| "loss": -7.0573, | |
| "step": 360 | |
| }, | |
| { | |
| "completion_length": 250.83334350585938, | |
| "epoch": 0.03208888888888889, | |
| "grad_norm": 72.94464111328125, | |
| "learning_rate": 3e-06, | |
| "loss": -14.781, | |
| "reward": 0.947916716337204, | |
| "reward_std": 0.45044803619384766, | |
| "rewards/boxed_and_answer_tags_format_reward": 0.6979166567325592, | |
| "rewards/correctness_reward_func_math": 0.2499999962747097, | |
| "step": 361, | |
| "zero_std_ratio": 0.375 | |
| }, | |
| { | |
| "epoch": 0.03217777777777778, | |
| "grad_norm": 91.14832305908203, | |
| "learning_rate": 3e-06, | |
| "loss": -15.6654, | |
| "step": 362 | |
| }, | |
| { | |
| "epoch": 0.032266666666666666, | |
| "grad_norm": 74.7421875, | |
| "learning_rate": 3e-06, | |
| "loss": -18.7276, | |
| "step": 363 | |
| }, | |
| { | |
| "epoch": 0.032355555555555554, | |
| "grad_norm": 76.62783813476562, | |
| "learning_rate": 3e-06, | |
| "loss": -20.5042, | |
| "step": 364 | |
| }, | |
| { | |
| "epoch": 0.03244444444444444, | |
| "grad_norm": 77.55496978759766, | |
| "learning_rate": 3e-06, | |
| "loss": -23.5561, | |
| "step": 365 | |
| }, | |
| { | |
| "epoch": 0.03253333333333333, | |
| "grad_norm": 87.73894500732422, | |
| "learning_rate": 3e-06, | |
| "loss": -13.7666, | |
| "step": 366 | |
| }, | |
| { | |
| "epoch": 0.03262222222222222, | |
| "grad_norm": 85.25169372558594, | |
| "learning_rate": 3e-06, | |
| "loss": -15.9124, | |
| "step": 367 | |
| }, | |
| { | |
| "epoch": 0.032711111111111114, | |
| "grad_norm": 82.08868408203125, | |
| "learning_rate": 3e-06, | |
| "loss": -17.5144, | |
| "step": 368 | |
| }, | |
| { | |
| "epoch": 0.0328, | |
| "grad_norm": 88.5888900756836, | |
| "learning_rate": 3e-06, | |
| "loss": -20.7047, | |
| "step": 369 | |
| }, | |
| { | |
| "epoch": 0.03288888888888889, | |
| "grad_norm": 87.28410339355469, | |
| "learning_rate": 3e-06, | |
| "loss": -22.9003, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 0.03297777777777778, | |
| "grad_norm": 75.89826965332031, | |
| "learning_rate": 3e-06, | |
| "loss": -25.7767, | |
| "step": 371 | |
| }, | |
| { | |
| "epoch": 0.03306666666666667, | |
| "grad_norm": 79.7787094116211, | |
| "learning_rate": 3e-06, | |
| "loss": -16.1167, | |
| "step": 372 | |
| }, | |
| { | |
| "completion_length": 251.9791717529297, | |
| "epoch": 0.033155555555555556, | |
| "grad_norm": 122.57412719726562, | |
| "learning_rate": 3e-06, | |
| "loss": -7.3842, | |
| "reward": 1.1666666865348816, | |
| "reward_std": 0.7582502365112305, | |
| "rewards/boxed_and_answer_tags_format_reward": 0.5833333134651184, | |
| "rewards/correctness_reward_func_math": 0.5833333283662796, | |
| "step": 373, | |
| "zero_std_ratio": 0.125 | |
| }, | |
| { | |
| "epoch": 0.033244444444444445, | |
| "grad_norm": 103.27584838867188, | |
| "learning_rate": 3e-06, | |
| "loss": -2.3769, | |
| "step": 374 | |
| }, | |
| { | |
| "epoch": 0.03333333333333333, | |
| "grad_norm": 111.76622772216797, | |
| "learning_rate": 3e-06, | |
| "loss": 5.4646, | |
| "step": 375 | |
| }, | |
| { | |
| "epoch": 0.03342222222222222, | |
| "grad_norm": 110.36207580566406, | |
| "learning_rate": 3e-06, | |
| "loss": -9.271, | |
| "step": 376 | |
| }, | |
| { | |
| "epoch": 0.03351111111111111, | |
| "grad_norm": 102.65152740478516, | |
| "learning_rate": 3e-06, | |
| "loss": -0.8725, | |
| "step": 377 | |
| }, | |
| { | |
| "epoch": 0.0336, | |
| "grad_norm": 107.27348327636719, | |
| "learning_rate": 3e-06, | |
| "loss": -4.3279, | |
| "step": 378 | |
| }, | |
| { | |
| "epoch": 0.033688888888888886, | |
| "grad_norm": 118.1567153930664, | |
| "learning_rate": 3e-06, | |
| "loss": -8.1813, | |
| "step": 379 | |
| }, | |
| { | |
| "epoch": 0.033777777777777775, | |
| "grad_norm": 98.9560317993164, | |
| "learning_rate": 3e-06, | |
| "loss": -3.3073, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 0.03386666666666667, | |
| "grad_norm": 119.64665222167969, | |
| "learning_rate": 3e-06, | |
| "loss": 4.1117, | |
| "step": 381 | |
| }, | |
| { | |
| "epoch": 0.03395555555555556, | |
| "grad_norm": 118.64970397949219, | |
| "learning_rate": 3e-06, | |
| "loss": -10.3727, | |
| "step": 382 | |
| }, | |
| { | |
| "epoch": 0.03404444444444445, | |
| "grad_norm": 117.53937530517578, | |
| "learning_rate": 3e-06, | |
| "loss": -3.6092, | |
| "step": 383 | |
| }, | |
| { | |
| "epoch": 0.034133333333333335, | |
| "grad_norm": 108.86544799804688, | |
| "learning_rate": 3e-06, | |
| "loss": -6.4896, | |
| "step": 384 | |
| }, | |
| { | |
| "completion_length": 242.27084350585938, | |
| "epoch": 0.03422222222222222, | |
| "grad_norm": 37.28844451904297, | |
| "learning_rate": 3e-06, | |
| "loss": 35.3752, | |
| "reward": 1.0208333432674408, | |
| "reward_std": 0.12909945845603943, | |
| "rewards/boxed_and_answer_tags_format_reward": 0.6875, | |
| "rewards/correctness_reward_func_math": 0.3333333358168602, | |
| "step": 385, | |
| "zero_std_ratio": 0.875 | |
| }, | |
| { | |
| "epoch": 0.03431111111111111, | |
| "grad_norm": 34.82659149169922, | |
| "learning_rate": 3e-06, | |
| "loss": 32.1003, | |
| "step": 386 | |
| }, | |
| { | |
| "epoch": 0.0344, | |
| "grad_norm": 34.34743881225586, | |
| "learning_rate": 3e-06, | |
| "loss": 31.1165, | |
| "step": 387 | |
| }, | |
| { | |
| "epoch": 0.03448888888888889, | |
| "grad_norm": 44.72328186035156, | |
| "learning_rate": 3e-06, | |
| "loss": 32.7756, | |
| "step": 388 | |
| }, | |
| { | |
| "epoch": 0.03457777777777778, | |
| "grad_norm": 42.72700119018555, | |
| "learning_rate": 3e-06, | |
| "loss": 32.2398, | |
| "step": 389 | |
| }, | |
| { | |
| "epoch": 0.034666666666666665, | |
| "grad_norm": 47.69383239746094, | |
| "learning_rate": 3e-06, | |
| "loss": 33.0683, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 0.03475555555555555, | |
| "grad_norm": 39.66519546508789, | |
| "learning_rate": 3e-06, | |
| "loss": 34.5053, | |
| "step": 391 | |
| }, | |
| { | |
| "epoch": 0.03484444444444444, | |
| "grad_norm": 39.71942138671875, | |
| "learning_rate": 3e-06, | |
| "loss": 31.0092, | |
| "step": 392 | |
| }, | |
| { | |
| "epoch": 0.03493333333333333, | |
| "grad_norm": 36.60993576049805, | |
| "learning_rate": 3e-06, | |
| "loss": 30.3034, | |
| "step": 393 | |
| }, | |
| { | |
| "epoch": 0.035022222222222225, | |
| "grad_norm": 47.912837982177734, | |
| "learning_rate": 3e-06, | |
| "loss": 31.4023, | |
| "step": 394 | |
| }, | |
| { | |
| "epoch": 0.035111111111111114, | |
| "grad_norm": 42.3475341796875, | |
| "learning_rate": 3e-06, | |
| "loss": 31.2326, | |
| "step": 395 | |
| }, | |
| { | |
| "epoch": 0.0352, | |
| "grad_norm": 40.417381286621094, | |
| "learning_rate": 3e-06, | |
| "loss": 31.5571, | |
| "step": 396 | |
| }, | |
| { | |
| "completion_length": 251.56250762939453, | |
| "epoch": 0.03528888888888889, | |
| "grad_norm": 41.37530517578125, | |
| "learning_rate": 3e-06, | |
| "loss": 9.2621, | |
| "reward": 1.6458333730697632, | |
| "reward_std": 0.1489431317895651, | |
| "rewards/boxed_and_answer_tags_format_reward": 0.7291666865348816, | |
| "rewards/correctness_reward_func_math": 0.9166666865348816, | |
| "step": 397, | |
| "zero_std_ratio": 0.75 | |
| }, | |
| { | |
| "epoch": 0.03537777777777778, | |
| "grad_norm": 42.853084564208984, | |
| "learning_rate": 3e-06, | |
| "loss": 8.8572, | |
| "step": 398 | |
| }, | |
| { | |
| "epoch": 0.03546666666666667, | |
| "grad_norm": 41.449344635009766, | |
| "learning_rate": 3e-06, | |
| "loss": 5.6158, | |
| "step": 399 | |
| }, | |
| { | |
| "epoch": 0.035555555555555556, | |
| "grad_norm": 58.31279754638672, | |
| "learning_rate": 3e-06, | |
| "loss": 7.968, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.035644444444444444, | |
| "grad_norm": 48.664459228515625, | |
| "learning_rate": 3e-06, | |
| "loss": 11.1793, | |
| "step": 401 | |
| }, | |
| { | |
| "epoch": 0.03573333333333333, | |
| "grad_norm": 45.02378845214844, | |
| "learning_rate": 3e-06, | |
| "loss": 7.6242, | |
| "step": 402 | |
| }, | |
| { | |
| "epoch": 0.03582222222222222, | |
| "grad_norm": 43.53935241699219, | |
| "learning_rate": 3e-06, | |
| "loss": 8.0172, | |
| "step": 403 | |
| }, | |
| { | |
| "epoch": 0.03591111111111111, | |
| "grad_norm": 42.496604919433594, | |
| "learning_rate": 3e-06, | |
| "loss": 7.5088, | |
| "step": 404 | |
| }, | |
| { | |
| "epoch": 0.036, | |
| "grad_norm": 44.294986724853516, | |
| "learning_rate": 3e-06, | |
| "loss": 3.9932, | |
| "step": 405 | |
| }, | |
| { | |
| "epoch": 0.036088888888888886, | |
| "grad_norm": 73.07268524169922, | |
| "learning_rate": 3e-06, | |
| "loss": 6.5222, | |
| "step": 406 | |
| }, | |
| { | |
| "epoch": 0.03617777777777778, | |
| "grad_norm": 44.31553649902344, | |
| "learning_rate": 3e-06, | |
| "loss": 9.2936, | |
| "step": 407 | |
| }, | |
| { | |
| "epoch": 0.03626666666666667, | |
| "grad_norm": 48.42079162597656, | |
| "learning_rate": 3e-06, | |
| "loss": 5.8115, | |
| "step": 408 | |
| }, | |
| { | |
| "completion_length": 253.7291717529297, | |
| "epoch": 0.03635555555555556, | |
| "grad_norm": 55.15653991699219, | |
| "learning_rate": 3e-06, | |
| "loss": -4.6673, | |
| "reward": 0.90625, | |
| "reward_std": 0.15461495518684387, | |
| "rewards/boxed_and_answer_tags_format_reward": 0.7395833432674408, | |
| "rewards/correctness_reward_func_math": 0.1666666716337204, | |
| "step": 409, | |
| "zero_std_ratio": 0.75 | |
| }, | |
| { | |
| "epoch": 0.036444444444444446, | |
| "grad_norm": 54.93301773071289, | |
| "learning_rate": 3e-06, | |
| "loss": 1.7365, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 0.036533333333333334, | |
| "grad_norm": 50.56829071044922, | |
| "learning_rate": 3e-06, | |
| "loss": 2.3972, | |
| "step": 411 | |
| }, | |
| { | |
| "epoch": 0.03662222222222222, | |
| "grad_norm": 50.894187927246094, | |
| "learning_rate": 3e-06, | |
| "loss": 0.7298, | |
| "step": 412 | |
| }, | |
| { | |
| "epoch": 0.03671111111111111, | |
| "grad_norm": 64.99378204345703, | |
| "learning_rate": 3e-06, | |
| "loss": 1.6822, | |
| "step": 413 | |
| }, | |
| { | |
| "epoch": 0.0368, | |
| "grad_norm": 53.45103454589844, | |
| "learning_rate": 3e-06, | |
| "loss": 2.1252, | |
| "step": 414 | |
| }, | |
| { | |
| "epoch": 0.03688888888888889, | |
| "grad_norm": 58.880393981933594, | |
| "learning_rate": 3e-06, | |
| "loss": -4.8912, | |
| "step": 415 | |
| }, | |
| { | |
| "epoch": 0.036977777777777776, | |
| "grad_norm": 52.00230407714844, | |
| "learning_rate": 3e-06, | |
| "loss": 0.9087, | |
| "step": 416 | |
| }, | |
| { | |
| "epoch": 0.037066666666666664, | |
| "grad_norm": 54.192508697509766, | |
| "learning_rate": 3e-06, | |
| "loss": 1.5202, | |
| "step": 417 | |
| }, | |
| { | |
| "epoch": 0.03715555555555555, | |
| "grad_norm": 52.10379409790039, | |
| "learning_rate": 3e-06, | |
| "loss": -0.0026, | |
| "step": 418 | |
| }, | |
| { | |
| "epoch": 0.03724444444444444, | |
| "grad_norm": 56.19913864135742, | |
| "learning_rate": 3e-06, | |
| "loss": 0.9843, | |
| "step": 419 | |
| }, | |
| { | |
| "epoch": 0.037333333333333336, | |
| "grad_norm": 51.573699951171875, | |
| "learning_rate": 3e-06, | |
| "loss": 0.9967, | |
| "step": 420 | |
| }, | |
| { | |
| "completion_length": 241.95833587646484, | |
| "epoch": 0.037422222222222225, | |
| "grad_norm": 65.98304748535156, | |
| "learning_rate": 3e-06, | |
| "loss": 1.8524, | |
| "reward": 1.0937500596046448, | |
| "reward_std": 0.3994170129299164, | |
| "rewards/boxed_and_answer_tags_format_reward": 0.59375, | |
| "rewards/correctness_reward_func_math": 0.5, | |
| "step": 421, | |
| "zero_std_ratio": 0.625 | |
| }, | |
| { | |
| "epoch": 0.03751111111111111, | |
| "grad_norm": 68.6063232421875, | |
| "learning_rate": 3e-06, | |
| "loss": -2.8346, | |
| "step": 422 | |
| }, | |
| { | |
| "epoch": 0.0376, | |
| "grad_norm": 61.650146484375, | |
| "learning_rate": 3e-06, | |
| "loss": -2.9892, | |
| "step": 423 | |
| }, | |
| { | |
| "epoch": 0.03768888888888889, | |
| "grad_norm": 66.45751953125, | |
| "learning_rate": 3e-06, | |
| "loss": -3.349, | |
| "step": 424 | |
| }, | |
| { | |
| "epoch": 0.03777777777777778, | |
| "grad_norm": 66.84425354003906, | |
| "learning_rate": 3e-06, | |
| "loss": 2.716, | |
| "step": 425 | |
| }, | |
| { | |
| "epoch": 0.037866666666666667, | |
| "grad_norm": 61.292354583740234, | |
| "learning_rate": 3e-06, | |
| "loss": 1.6447, | |
| "step": 426 | |
| }, | |
| { | |
| "epoch": 0.037955555555555555, | |
| "grad_norm": 66.02394104003906, | |
| "learning_rate": 3e-06, | |
| "loss": 1.1078, | |
| "step": 427 | |
| }, | |
| { | |
| "epoch": 0.03804444444444444, | |
| "grad_norm": 62.10107421875, | |
| "learning_rate": 3e-06, | |
| "loss": -3.9971, | |
| "step": 428 | |
| }, | |
| { | |
| "epoch": 0.03813333333333333, | |
| "grad_norm": 85.67212677001953, | |
| "learning_rate": 3e-06, | |
| "loss": -3.8248, | |
| "step": 429 | |
| }, | |
| { | |
| "epoch": 0.03822222222222222, | |
| "grad_norm": 60.61140060424805, | |
| "learning_rate": 3e-06, | |
| "loss": -4.433, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 0.03831111111111111, | |
| "grad_norm": 62.81836700439453, | |
| "learning_rate": 3e-06, | |
| "loss": 1.751, | |
| "step": 431 | |
| }, | |
| { | |
| "epoch": 0.0384, | |
| "grad_norm": 60.05256652832031, | |
| "learning_rate": 3e-06, | |
| "loss": 0.8432, | |
| "step": 432 | |
| }, | |
| { | |
| "completion_length": 247.27083587646484, | |
| "epoch": 0.03848888888888889, | |
| "grad_norm": 83.67351531982422, | |
| "learning_rate": 3e-06, | |
| "loss": 6.9951, | |
| "reward": 1.3229166865348816, | |
| "reward_std": 0.3936077058315277, | |
| "rewards/boxed_and_answer_tags_format_reward": 0.7395833432674408, | |
| "rewards/correctness_reward_func_math": 0.5833333544433117, | |
| "step": 433, | |
| "zero_std_ratio": 0.5 | |
| }, | |
| { | |
| "epoch": 0.03857777777777778, | |
| "grad_norm": 71.3005599975586, | |
| "learning_rate": 3e-06, | |
| "loss": 4.71, | |
| "step": 434 | |
| }, | |
| { | |
| "epoch": 0.03866666666666667, | |
| "grad_norm": 81.9188003540039, | |
| "learning_rate": 3e-06, | |
| "loss": 8.7788, | |
| "step": 435 | |
| }, | |
| { | |
| "epoch": 0.03875555555555556, | |
| "grad_norm": 79.42195129394531, | |
| "learning_rate": 3e-06, | |
| "loss": 11.129, | |
| "step": 436 | |
| }, | |
| { | |
| "epoch": 0.038844444444444445, | |
| "grad_norm": 84.69261169433594, | |
| "learning_rate": 3e-06, | |
| "loss": 10.7206, | |
| "step": 437 | |
| }, | |
| { | |
| "epoch": 0.038933333333333334, | |
| "grad_norm": 74.52008819580078, | |
| "learning_rate": 3e-06, | |
| "loss": 12.7332, | |
| "step": 438 | |
| }, | |
| { | |
| "epoch": 0.03902222222222222, | |
| "grad_norm": 93.8567123413086, | |
| "learning_rate": 3e-06, | |
| "loss": 6.2331, | |
| "step": 439 | |
| }, | |
| { | |
| "epoch": 0.03911111111111111, | |
| "grad_norm": 79.21229553222656, | |
| "learning_rate": 3e-06, | |
| "loss": 3.7102, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 0.0392, | |
| "grad_norm": 74.53849792480469, | |
| "learning_rate": 3e-06, | |
| "loss": 8.0091, | |
| "step": 441 | |
| }, | |
| { | |
| "epoch": 0.03928888888888889, | |
| "grad_norm": 79.04943084716797, | |
| "learning_rate": 3e-06, | |
| "loss": 9.4256, | |
| "step": 442 | |
| }, | |
| { | |
| "epoch": 0.039377777777777775, | |
| "grad_norm": 81.54142761230469, | |
| "learning_rate": 3e-06, | |
| "loss": 9.0032, | |
| "step": 443 | |
| }, | |
| { | |
| "epoch": 0.039466666666666664, | |
| "grad_norm": 83.44096374511719, | |
| "learning_rate": 3e-06, | |
| "loss": 11.701, | |
| "step": 444 | |
| }, | |
| { | |
| "completion_length": 249.2291717529297, | |
| "epoch": 0.03955555555555555, | |
| "grad_norm": 78.02682495117188, | |
| "learning_rate": 3e-06, | |
| "loss": 1.3882, | |
| "reward": 1.1458333730697632, | |
| "reward_std": 0.33968228101730347, | |
| "rewards/boxed_and_answer_tags_format_reward": 0.6041666865348816, | |
| "rewards/correctness_reward_func_math": 0.5416666679084301, | |
| "step": 445, | |
| "zero_std_ratio": 0.5 | |
| }, | |
| { | |
| "epoch": 0.03964444444444445, | |
| "grad_norm": 77.9029312133789, | |
| "learning_rate": 3e-06, | |
| "loss": -10.5754, | |
| "step": 446 | |
| }, | |
| { | |
| "epoch": 0.039733333333333336, | |
| "grad_norm": 78.25541687011719, | |
| "learning_rate": 3e-06, | |
| "loss": -4.1056, | |
| "step": 447 | |
| }, | |
| { | |
| "epoch": 0.039822222222222224, | |
| "grad_norm": 100.47134399414062, | |
| "learning_rate": 3e-06, | |
| "loss": -10.7897, | |
| "step": 448 | |
| }, | |
| { | |
| "epoch": 0.03991111111111111, | |
| "grad_norm": 74.78359985351562, | |
| "learning_rate": 3e-06, | |
| "loss": -3.3144, | |
| "step": 449 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "grad_norm": 96.55880737304688, | |
| "learning_rate": 3e-06, | |
| "loss": 2.8763, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 0.04008888888888889, | |
| "grad_norm": 87.14449310302734, | |
| "learning_rate": 3e-06, | |
| "loss": 0.2308, | |
| "step": 451 | |
| }, | |
| { | |
| "epoch": 0.04017777777777778, | |
| "grad_norm": 97.77748107910156, | |
| "learning_rate": 3e-06, | |
| "loss": -11.2666, | |
| "step": 452 | |
| }, | |
| { | |
| "epoch": 0.040266666666666666, | |
| "grad_norm": 73.44164276123047, | |
| "learning_rate": 3e-06, | |
| "loss": -5.678, | |
| "step": 453 | |
| }, | |
| { | |
| "epoch": 0.040355555555555554, | |
| "grad_norm": 100.09737396240234, | |
| "learning_rate": 3e-06, | |
| "loss": -11.4908, | |
| "step": 454 | |
| }, | |
| { | |
| "epoch": 0.04044444444444444, | |
| "grad_norm": 74.52122497558594, | |
| "learning_rate": 3e-06, | |
| "loss": -4.6635, | |
| "step": 455 | |
| }, | |
| { | |
| "epoch": 0.04053333333333333, | |
| "grad_norm": 71.74639892578125, | |
| "learning_rate": 3e-06, | |
| "loss": 1.7931, | |
| "step": 456 | |
| }, | |
| { | |
| "completion_length": 252.52084350585938, | |
| "epoch": 0.04062222222222222, | |
| "grad_norm": 102.89392852783203, | |
| "learning_rate": 3e-06, | |
| "loss": 12.935, | |
| "reward": 1.0625000596046448, | |
| "reward_std": 0.43528568744659424, | |
| "rewards/boxed_and_answer_tags_format_reward": 0.6875, | |
| "rewards/correctness_reward_func_math": 0.3750000111758709, | |
| "step": 457, | |
| "zero_std_ratio": 0.5 | |
| }, | |
| { | |
| "epoch": 0.040711111111111115, | |
| "grad_norm": 105.67884063720703, | |
| "learning_rate": 3e-06, | |
| "loss": 2.3682, | |
| "step": 458 | |
| }, | |
| { | |
| "epoch": 0.0408, | |
| "grad_norm": 90.33670806884766, | |
| "learning_rate": 3e-06, | |
| "loss": 14.3722, | |
| "step": 459 | |
| }, | |
| { | |
| "epoch": 0.04088888888888889, | |
| "grad_norm": 109.0367431640625, | |
| "learning_rate": 3e-06, | |
| "loss": 11.6758, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 0.04097777777777778, | |
| "grad_norm": 108.55673217773438, | |
| "learning_rate": 3e-06, | |
| "loss": 13.7948, | |
| "step": 461 | |
| }, | |
| { | |
| "epoch": 0.04106666666666667, | |
| "grad_norm": 91.73406982421875, | |
| "learning_rate": 3e-06, | |
| "loss": 7.7516, | |
| "step": 462 | |
| }, | |
| { | |
| "epoch": 0.041155555555555556, | |
| "grad_norm": 94.59785461425781, | |
| "learning_rate": 3e-06, | |
| "loss": 11.5977, | |
| "step": 463 | |
| }, | |
| { | |
| "epoch": 0.041244444444444445, | |
| "grad_norm": 78.94522857666016, | |
| "learning_rate": 3e-06, | |
| "loss": 1.9425, | |
| "step": 464 | |
| }, | |
| { | |
| "epoch": 0.04133333333333333, | |
| "grad_norm": 93.91060638427734, | |
| "learning_rate": 3e-06, | |
| "loss": 12.3454, | |
| "step": 465 | |
| }, | |
| { | |
| "epoch": 0.04142222222222222, | |
| "grad_norm": 98.78986358642578, | |
| "learning_rate": 3e-06, | |
| "loss": 10.3247, | |
| "step": 466 | |
| }, | |
| { | |
| "epoch": 0.04151111111111111, | |
| "grad_norm": 111.63731384277344, | |
| "learning_rate": 3e-06, | |
| "loss": 12.2581, | |
| "step": 467 | |
| }, | |
| { | |
| "epoch": 0.0416, | |
| "grad_norm": 88.63348388671875, | |
| "learning_rate": 3e-06, | |
| "loss": 5.7115, | |
| "step": 468 | |
| }, | |
| { | |
| "completion_length": 235.58333587646484, | |
| "epoch": 0.041688888888888886, | |
| "grad_norm": 70.78716278076172, | |
| "learning_rate": 3e-06, | |
| "loss": -40.1334, | |
| "reward": 1.4687500596046448, | |
| "reward_std": 0.38577648997306824, | |
| "rewards/boxed_and_answer_tags_format_reward": 0.6770833432674408, | |
| "rewards/correctness_reward_func_math": 0.7916666865348816, | |
| "step": 469, | |
| "zero_std_ratio": 0.625 | |
| }, | |
| { | |
| "epoch": 0.041777777777777775, | |
| "grad_norm": 93.38511657714844, | |
| "learning_rate": 3e-06, | |
| "loss": -36.9438, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 0.04186666666666667, | |
| "grad_norm": 84.67546081542969, | |
| "learning_rate": 3e-06, | |
| "loss": -43.221, | |
| "step": 471 | |
| }, | |
| { | |
| "epoch": 0.04195555555555556, | |
| "grad_norm": 99.22493743896484, | |
| "learning_rate": 3e-06, | |
| "loss": -43.1284, | |
| "step": 472 | |
| }, | |
| { | |
| "epoch": 0.04204444444444445, | |
| "grad_norm": 77.45098876953125, | |
| "learning_rate": 3e-06, | |
| "loss": -40.9152, | |
| "step": 473 | |
| }, | |
| { | |
| "epoch": 0.042133333333333335, | |
| "grad_norm": 70.95470428466797, | |
| "learning_rate": 3e-06, | |
| "loss": -32.8293, | |
| "step": 474 | |
| }, | |
| { | |
| "epoch": 0.042222222222222223, | |
| "grad_norm": 77.2640609741211, | |
| "learning_rate": 3e-06, | |
| "loss": -41.1009, | |
| "step": 475 | |
| }, | |
| { | |
| "epoch": 0.04231111111111111, | |
| "grad_norm": 84.97554779052734, | |
| "learning_rate": 3e-06, | |
| "loss": -37.7295, | |
| "step": 476 | |
| }, | |
| { | |
| "epoch": 0.0424, | |
| "grad_norm": 67.63621520996094, | |
| "learning_rate": 3e-06, | |
| "loss": -44.2423, | |
| "step": 477 | |
| }, | |
| { | |
| "epoch": 0.04248888888888889, | |
| "grad_norm": 99.13645935058594, | |
| "learning_rate": 3e-06, | |
| "loss": -45.4078, | |
| "step": 478 | |
| }, | |
| { | |
| "epoch": 0.04257777777777778, | |
| "grad_norm": 71.1223373413086, | |
| "learning_rate": 3e-06, | |
| "loss": -43.0538, | |
| "step": 479 | |
| }, | |
| { | |
| "epoch": 0.042666666666666665, | |
| "grad_norm": 81.17517852783203, | |
| "learning_rate": 3e-06, | |
| "loss": -34.5254, | |
| "step": 480 | |
| }, | |
| { | |
| "completion_length": 250.5625, | |
| "epoch": 0.042755555555555554, | |
| "grad_norm": 111.48869323730469, | |
| "learning_rate": 3e-06, | |
| "loss": 20.9161, | |
| "reward": 1.2291666865348816, | |
| "reward_std": 0.26686520874500275, | |
| "rewards/boxed_and_answer_tags_format_reward": 0.6041666865348816, | |
| "rewards/correctness_reward_func_math": 0.6250000223517418, | |
| "step": 481, | |
| "zero_std_ratio": 0.625 | |
| }, | |
| { | |
| "epoch": 0.04284444444444444, | |
| "grad_norm": 110.74134063720703, | |
| "learning_rate": 3e-06, | |
| "loss": 21.5354, | |
| "step": 482 | |
| }, | |
| { | |
| "epoch": 0.04293333333333333, | |
| "grad_norm": 121.27849578857422, | |
| "learning_rate": 3e-06, | |
| "loss": 22.1143, | |
| "step": 483 | |
| }, | |
| { | |
| "epoch": 0.043022222222222226, | |
| "grad_norm": 111.5245590209961, | |
| "learning_rate": 3e-06, | |
| "loss": 27.6408, | |
| "step": 484 | |
| }, | |
| { | |
| "epoch": 0.043111111111111114, | |
| "grad_norm": 100.0729751586914, | |
| "learning_rate": 3e-06, | |
| "loss": 24.0997, | |
| "step": 485 | |
| }, | |
| { | |
| "epoch": 0.0432, | |
| "grad_norm": 123.4867935180664, | |
| "learning_rate": 3e-06, | |
| "loss": 19.3324, | |
| "step": 486 | |
| }, | |
| { | |
| "epoch": 0.04328888888888889, | |
| "grad_norm": 107.24234008789062, | |
| "learning_rate": 3e-06, | |
| "loss": 18.5364, | |
| "step": 487 | |
| }, | |
| { | |
| "epoch": 0.04337777777777778, | |
| "grad_norm": 113.5108413696289, | |
| "learning_rate": 3e-06, | |
| "loss": 18.1962, | |
| "step": 488 | |
| }, | |
| { | |
| "epoch": 0.04346666666666667, | |
| "grad_norm": 136.6710968017578, | |
| "learning_rate": 3e-06, | |
| "loss": 17.9099, | |
| "step": 489 | |
| }, | |
| { | |
| "epoch": 0.043555555555555556, | |
| "grad_norm": 110.78118896484375, | |
| "learning_rate": 3e-06, | |
| "loss": 24.1538, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 0.043644444444444444, | |
| "grad_norm": 87.0732192993164, | |
| "learning_rate": 3e-06, | |
| "loss": 20.6286, | |
| "step": 491 | |
| }, | |
| { | |
| "epoch": 0.04373333333333333, | |
| "grad_norm": 111.51911163330078, | |
| "learning_rate": 3e-06, | |
| "loss": 15.8689, | |
| "step": 492 | |
| }, | |
| { | |
| "completion_length": 232.1666717529297, | |
| "epoch": 0.04382222222222222, | |
| "grad_norm": 58.495811462402344, | |
| "learning_rate": 3e-06, | |
| "loss": 1.5791, | |
| "reward": 1.1145833730697632, | |
| "reward_std": 0.20219221711158752, | |
| "rewards/boxed_and_answer_tags_format_reward": 0.6145833432674408, | |
| "rewards/correctness_reward_func_math": 0.5, | |
| "step": 493, | |
| "zero_std_ratio": 0.75 | |
| }, | |
| { | |
| "epoch": 0.04391111111111111, | |
| "grad_norm": 66.24677276611328, | |
| "learning_rate": 3e-06, | |
| "loss": 8.869, | |
| "step": 494 | |
| }, | |
| { | |
| "epoch": 0.044, | |
| "grad_norm": 75.63920593261719, | |
| "learning_rate": 3e-06, | |
| "loss": 6.6083, | |
| "step": 495 | |
| }, | |
| { | |
| "epoch": 0.044088888888888886, | |
| "grad_norm": 41.957889556884766, | |
| "learning_rate": 3e-06, | |
| "loss": 6.373, | |
| "step": 496 | |
| }, | |
| { | |
| "epoch": 0.04417777777777778, | |
| "grad_norm": 56.327693939208984, | |
| "learning_rate": 3e-06, | |
| "loss": 8.5285, | |
| "step": 497 | |
| }, | |
| { | |
| "epoch": 0.04426666666666667, | |
| "grad_norm": 56.58005905151367, | |
| "learning_rate": 3e-06, | |
| "loss": 8.228, | |
| "step": 498 | |
| }, | |
| { | |
| "epoch": 0.04435555555555556, | |
| "grad_norm": 56.65522766113281, | |
| "learning_rate": 3e-06, | |
| "loss": 1.1127, | |
| "step": 499 | |
| }, | |
| { | |
| "epoch": 0.044444444444444446, | |
| "grad_norm": 52.785221099853516, | |
| "learning_rate": 3e-06, | |
| "loss": 8.324, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.044533333333333334, | |
| "grad_norm": 52.37721252441406, | |
| "learning_rate": 3e-06, | |
| "loss": 5.6566, | |
| "step": 501 | |
| }, | |
| { | |
| "epoch": 0.04462222222222222, | |
| "grad_norm": 50.5732307434082, | |
| "learning_rate": 3e-06, | |
| "loss": 5.266, | |
| "step": 502 | |
| }, | |
| { | |
| "epoch": 0.04471111111111111, | |
| "grad_norm": 59.614261627197266, | |
| "learning_rate": 3e-06, | |
| "loss": 7.5254, | |
| "step": 503 | |
| }, | |
| { | |
| "epoch": 0.0448, | |
| "grad_norm": 57.78561782836914, | |
| "learning_rate": 3e-06, | |
| "loss": 6.7155, | |
| "step": 504 | |
| }, | |
| { | |
| "completion_length": 249.5416717529297, | |
| "epoch": 0.04488888888888889, | |
| "grad_norm": 75.29253387451172, | |
| "learning_rate": 3e-06, | |
| "loss": 1.5945, | |
| "reward": 1.1979166865348816, | |
| "reward_std": 0.3113893121480942, | |
| "rewards/boxed_and_answer_tags_format_reward": 0.65625, | |
| "rewards/correctness_reward_func_math": 0.5416666567325592, | |
| "step": 505, | |
| "zero_std_ratio": 0.5 | |
| }, | |
| { | |
| "epoch": 0.044977777777777776, | |
| "grad_norm": 71.35801696777344, | |
| "learning_rate": 3e-06, | |
| "loss": 6.5836, | |
| "step": 506 | |
| }, | |
| { | |
| "epoch": 0.045066666666666665, | |
| "grad_norm": 68.37297058105469, | |
| "learning_rate": 3e-06, | |
| "loss": 2.1054, | |
| "step": 507 | |
| }, | |
| { | |
| "epoch": 0.04515555555555555, | |
| "grad_norm": 67.4723892211914, | |
| "learning_rate": 3e-06, | |
| "loss": 2.3974, | |
| "step": 508 | |
| }, | |
| { | |
| "epoch": 0.04524444444444444, | |
| "grad_norm": 84.1152114868164, | |
| "learning_rate": 3e-06, | |
| "loss": 0.1293, | |
| "step": 509 | |
| }, | |
| { | |
| "epoch": 0.04533333333333334, | |
| "grad_norm": 95.73898315429688, | |
| "learning_rate": 3e-06, | |
| "loss": -3.5888, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 0.045422222222222225, | |
| "grad_norm": 73.4489974975586, | |
| "learning_rate": 3e-06, | |
| "loss": 0.6248, | |
| "step": 511 | |
| }, | |
| { | |
| "epoch": 0.04551111111111111, | |
| "grad_norm": 67.3970947265625, | |
| "learning_rate": 3e-06, | |
| "loss": 5.0044, | |
| "step": 512 | |
| }, | |
| { | |
| "epoch": 0.0456, | |
| "grad_norm": 68.55184936523438, | |
| "learning_rate": 3e-06, | |
| "loss": 1.048, | |
| "step": 513 | |
| }, | |
| { | |
| "epoch": 0.04568888888888889, | |
| "grad_norm": 72.8236312866211, | |
| "learning_rate": 3e-06, | |
| "loss": 0.9001, | |
| "step": 514 | |
| }, | |
| { | |
| "epoch": 0.04577777777777778, | |
| "grad_norm": 72.06463623046875, | |
| "learning_rate": 3e-06, | |
| "loss": -1.2144, | |
| "step": 515 | |
| }, | |
| { | |
| "epoch": 0.04586666666666667, | |
| "grad_norm": 87.04244995117188, | |
| "learning_rate": 3e-06, | |
| "loss": -5.3325, | |
| "step": 516 | |
| }, | |
| { | |
| "completion_length": 235.27083587646484, | |
| "epoch": 0.045955555555555555, | |
| "grad_norm": 76.81670379638672, | |
| "learning_rate": 3e-06, | |
| "loss": -18.2768, | |
| "reward": 1.2395833730697632, | |
| "reward_std": 0.4973409175872803, | |
| "rewards/boxed_and_answer_tags_format_reward": 0.6145833432674408, | |
| "rewards/correctness_reward_func_math": 0.625, | |
| "step": 517, | |
| "zero_std_ratio": 0.5 | |
| }, | |
| { | |
| "epoch": 0.04604444444444444, | |
| "grad_norm": 129.93211364746094, | |
| "learning_rate": 3e-06, | |
| "loss": -20.3578, | |
| "step": 518 | |
| }, | |
| { | |
| "epoch": 0.04613333333333333, | |
| "grad_norm": 87.40878295898438, | |
| "learning_rate": 3e-06, | |
| "loss": -19.4647, | |
| "step": 519 | |
| }, | |
| { | |
| "epoch": 0.04622222222222222, | |
| "grad_norm": 88.27845764160156, | |
| "learning_rate": 3e-06, | |
| "loss": -23.9652, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 0.04631111111111111, | |
| "grad_norm": 74.01776123046875, | |
| "learning_rate": 3e-06, | |
| "loss": -23.0054, | |
| "step": 521 | |
| }, | |
| { | |
| "epoch": 0.0464, | |
| "grad_norm": 86.09662628173828, | |
| "learning_rate": 3e-06, | |
| "loss": -16.2859, | |
| "step": 522 | |
| }, | |
| { | |
| "epoch": 0.04648888888888889, | |
| "grad_norm": 86.07221221923828, | |
| "learning_rate": 3e-06, | |
| "loss": -19.6548, | |
| "step": 523 | |
| }, | |
| { | |
| "epoch": 0.04657777777777778, | |
| "grad_norm": 81.51930236816406, | |
| "learning_rate": 3e-06, | |
| "loss": -22.0114, | |
| "step": 524 | |
| }, | |
| { | |
| "epoch": 0.04666666666666667, | |
| "grad_norm": 85.83316802978516, | |
| "learning_rate": 3e-06, | |
| "loss": -21.3756, | |
| "step": 525 | |
| }, | |
| { | |
| "epoch": 0.04675555555555556, | |
| "grad_norm": 80.6872787475586, | |
| "learning_rate": 3e-06, | |
| "loss": -25.6275, | |
| "step": 526 | |
| }, | |
| { | |
| "epoch": 0.046844444444444445, | |
| "grad_norm": 82.79136657714844, | |
| "learning_rate": 3e-06, | |
| "loss": -24.846, | |
| "step": 527 | |
| }, | |
| { | |
| "epoch": 0.046933333333333334, | |
| "grad_norm": 91.39631652832031, | |
| "learning_rate": 3e-06, | |
| "loss": -17.8527, | |
| "step": 528 | |
| }, | |
| { | |
| "completion_length": 251.77084350585938, | |
| "epoch": 0.04702222222222222, | |
| "grad_norm": 58.933895111083984, | |
| "learning_rate": 3e-06, | |
| "loss": 1.5821, | |
| "reward": 1.7083333730697632, | |
| "reward_std": 0.29204893112182617, | |
| "rewards/boxed_and_answer_tags_format_reward": 0.7083333432674408, | |
| "rewards/correctness_reward_func_math": 1.0, | |
| "step": 529, | |
| "zero_std_ratio": 0.625 | |
| }, | |
| { | |
| "epoch": 0.04711111111111111, | |
| "grad_norm": 70.82673645019531, | |
| "learning_rate": 3e-06, | |
| "loss": 4.666, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 0.0472, | |
| "grad_norm": 62.45388412475586, | |
| "learning_rate": 3e-06, | |
| "loss": -0.8319, | |
| "step": 531 | |
| }, | |
| { | |
| "epoch": 0.04728888888888889, | |
| "grad_norm": 68.45257568359375, | |
| "learning_rate": 3e-06, | |
| "loss": -4.2921, | |
| "step": 532 | |
| }, | |
| { | |
| "epoch": 0.047377777777777776, | |
| "grad_norm": 63.96629333496094, | |
| "learning_rate": 3e-06, | |
| "loss": 7.1791, | |
| "step": 533 | |
| }, | |
| { | |
| "epoch": 0.047466666666666664, | |
| "grad_norm": 71.47022247314453, | |
| "learning_rate": 3e-06, | |
| "loss": -0.0559, | |
| "step": 534 | |
| }, | |
| { | |
| "epoch": 0.04755555555555555, | |
| "grad_norm": 69.21123504638672, | |
| "learning_rate": 3e-06, | |
| "loss": 1.1193, | |
| "step": 535 | |
| }, | |
| { | |
| "epoch": 0.04764444444444445, | |
| "grad_norm": 54.05726623535156, | |
| "learning_rate": 3e-06, | |
| "loss": 3.7663, | |
| "step": 536 | |
| }, | |
| { | |
| "epoch": 0.047733333333333336, | |
| "grad_norm": 57.079166412353516, | |
| "learning_rate": 3e-06, | |
| "loss": -1.8682, | |
| "step": 537 | |
| }, | |
| { | |
| "epoch": 0.047822222222222224, | |
| "grad_norm": 89.69593811035156, | |
| "learning_rate": 3e-06, | |
| "loss": -5.0232, | |
| "step": 538 | |
| }, | |
| { | |
| "epoch": 0.04791111111111111, | |
| "grad_norm": 68.15176391601562, | |
| "learning_rate": 3e-06, | |
| "loss": 6.4281, | |
| "step": 539 | |
| }, | |
| { | |
| "epoch": 0.048, | |
| "grad_norm": 71.53436279296875, | |
| "learning_rate": 3e-06, | |
| "loss": -0.8678, | |
| "step": 540 | |
| }, | |
| { | |
| "completion_length": 248.4791717529297, | |
| "epoch": 0.04808888888888889, | |
| "grad_norm": 111.93199157714844, | |
| "learning_rate": 3e-06, | |
| "loss": -3.1821, | |
| "reward": 1.3125, | |
| "reward_std": 0.7091469466686249, | |
| "rewards/boxed_and_answer_tags_format_reward": 0.6875, | |
| "rewards/correctness_reward_func_math": 0.625, | |
| "step": 541, | |
| "zero_std_ratio": 0.25 | |
| }, | |
| { | |
| "epoch": 0.04817777777777778, | |
| "grad_norm": 104.72472381591797, | |
| "learning_rate": 3e-06, | |
| "loss": -9.114, | |
| "step": 542 | |
| }, | |
| { | |
| "epoch": 0.048266666666666666, | |
| "grad_norm": 137.42185974121094, | |
| "learning_rate": 3e-06, | |
| "loss": -12.9595, | |
| "step": 543 | |
| }, | |
| { | |
| "epoch": 0.048355555555555554, | |
| "grad_norm": 115.6964340209961, | |
| "learning_rate": 3e-06, | |
| "loss": -16.5078, | |
| "step": 544 | |
| }, | |
| { | |
| "epoch": 0.04844444444444444, | |
| "grad_norm": 107.43921661376953, | |
| "learning_rate": 3e-06, | |
| "loss": -12.0856, | |
| "step": 545 | |
| }, | |
| { | |
| "epoch": 0.04853333333333333, | |
| "grad_norm": 139.91366577148438, | |
| "learning_rate": 3e-06, | |
| "loss": -8.5451, | |
| "step": 546 | |
| }, | |
| { | |
| "epoch": 0.04862222222222222, | |
| "grad_norm": 107.25052642822266, | |
| "learning_rate": 3e-06, | |
| "loss": -4.7342, | |
| "step": 547 | |
| }, | |
| { | |
| "epoch": 0.04871111111111111, | |
| "grad_norm": 104.6925048828125, | |
| "learning_rate": 3e-06, | |
| "loss": -10.6582, | |
| "step": 548 | |
| }, | |
| { | |
| "epoch": 0.0488, | |
| "grad_norm": 108.13795471191406, | |
| "learning_rate": 3e-06, | |
| "loss": -14.883, | |
| "step": 549 | |
| }, | |
| { | |
| "epoch": 0.04888888888888889, | |
| "grad_norm": 108.62395477294922, | |
| "learning_rate": 3e-06, | |
| "loss": -19.0199, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 0.04897777777777778, | |
| "grad_norm": 103.06570434570312, | |
| "learning_rate": 3e-06, | |
| "loss": -14.306, | |
| "step": 551 | |
| }, | |
| { | |
| "epoch": 0.04906666666666667, | |
| "grad_norm": 125.94219970703125, | |
| "learning_rate": 3e-06, | |
| "loss": -10.2489, | |
| "step": 552 | |
| }, | |
| { | |
| "completion_length": 221.43750762939453, | |
| "epoch": 0.049155555555555557, | |
| "grad_norm": 90.84999084472656, | |
| "learning_rate": 3e-06, | |
| "loss": 12.0477, | |
| "reward": 2.0104166865348816, | |
| "reward_std": 0.406316339969635, | |
| "rewards/boxed_and_answer_tags_format_reward": 0.6770833432674408, | |
| "rewards/correctness_reward_func_math": 1.3333333730697632, | |
| "step": 553, | |
| "zero_std_ratio": 0.5 | |
| }, | |
| { | |
| "epoch": 0.049244444444444445, | |
| "grad_norm": 81.99716186523438, | |
| "learning_rate": 3e-06, | |
| "loss": 11.4046, | |
| "step": 554 | |
| }, | |
| { | |
| "epoch": 0.04933333333333333, | |
| "grad_norm": 89.69168090820312, | |
| "learning_rate": 3e-06, | |
| "loss": 15.5262, | |
| "step": 555 | |
| }, | |
| { | |
| "epoch": 0.04942222222222222, | |
| "grad_norm": 93.98981475830078, | |
| "learning_rate": 3e-06, | |
| "loss": 15.7367, | |
| "step": 556 | |
| }, | |
| { | |
| "epoch": 0.04951111111111111, | |
| "grad_norm": 86.68983459472656, | |
| "learning_rate": 3e-06, | |
| "loss": 5.511, | |
| "step": 557 | |
| }, | |
| { | |
| "epoch": 0.0496, | |
| "grad_norm": 78.73108673095703, | |
| "learning_rate": 3e-06, | |
| "loss": 13.5853, | |
| "step": 558 | |
| }, | |
| { | |
| "epoch": 0.04968888888888889, | |
| "grad_norm": 88.60321044921875, | |
| "learning_rate": 3e-06, | |
| "loss": 10.8131, | |
| "step": 559 | |
| }, | |
| { | |
| "epoch": 0.049777777777777775, | |
| "grad_norm": 78.82019805908203, | |
| "learning_rate": 3e-06, | |
| "loss": 10.9253, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 0.04986666666666666, | |
| "grad_norm": 91.8647232055664, | |
| "learning_rate": 3e-06, | |
| "loss": 14.6415, | |
| "step": 561 | |
| }, | |
| { | |
| "epoch": 0.04995555555555556, | |
| "grad_norm": 106.29496765136719, | |
| "learning_rate": 3e-06, | |
| "loss": 14.4603, | |
| "step": 562 | |
| }, | |
| { | |
| "epoch": 0.05004444444444445, | |
| "grad_norm": 72.984130859375, | |
| "learning_rate": 3e-06, | |
| "loss": 4.1028, | |
| "step": 563 | |
| }, | |
| { | |
| "epoch": 0.050133333333333335, | |
| "grad_norm": 88.46710205078125, | |
| "learning_rate": 3e-06, | |
| "loss": 11.4185, | |
| "step": 564 | |
| }, | |
| { | |
| "completion_length": 238.89584350585938, | |
| "epoch": 0.050222222222222224, | |
| "grad_norm": 91.9970703125, | |
| "learning_rate": 3e-06, | |
| "loss": -6.0724, | |
| "reward": 1.0729166865348816, | |
| "reward_std": 0.5935818552970886, | |
| "rewards/boxed_and_answer_tags_format_reward": 0.6145833432674408, | |
| "rewards/correctness_reward_func_math": 0.4583333283662796, | |
| "step": 565, | |
| "zero_std_ratio": 0.375 | |
| }, | |
| { | |
| "epoch": 0.05031111111111111, | |
| "grad_norm": 101.35919952392578, | |
| "learning_rate": 3e-06, | |
| "loss": -6.1114, | |
| "step": 566 | |
| }, | |
| { | |
| "epoch": 0.0504, | |
| "grad_norm": 100.29364776611328, | |
| "learning_rate": 3e-06, | |
| "loss": 3.7467, | |
| "step": 567 | |
| }, | |
| { | |
| "epoch": 0.05048888888888889, | |
| "grad_norm": 93.71849822998047, | |
| "learning_rate": 3e-06, | |
| "loss": 7.819, | |
| "step": 568 | |
| }, | |
| { | |
| "epoch": 0.05057777777777778, | |
| "grad_norm": 84.14008331298828, | |
| "learning_rate": 3e-06, | |
| "loss": -3.9591, | |
| "step": 569 | |
| }, | |
| { | |
| "epoch": 0.050666666666666665, | |
| "grad_norm": 91.14092254638672, | |
| "learning_rate": 3e-06, | |
| "loss": -7.111, | |
| "step": 570 | |
| }, | |
| { | |
| "epoch": 0.050755555555555554, | |
| "grad_norm": 93.15682220458984, | |
| "learning_rate": 3e-06, | |
| "loss": -7.5642, | |
| "step": 571 | |
| }, | |
| { | |
| "epoch": 0.05084444444444444, | |
| "grad_norm": 113.89299011230469, | |
| "learning_rate": 3e-06, | |
| "loss": -6.5357, | |
| "step": 572 | |
| }, | |
| { | |
| "epoch": 0.05093333333333333, | |
| "grad_norm": 91.64227294921875, | |
| "learning_rate": 3e-06, | |
| "loss": 2.2068, | |
| "step": 573 | |
| }, | |
| { | |
| "epoch": 0.05102222222222222, | |
| "grad_norm": 108.78038024902344, | |
| "learning_rate": 3e-06, | |
| "loss": 5.8992, | |
| "step": 574 | |
| }, | |
| { | |
| "epoch": 0.051111111111111114, | |
| "grad_norm": 83.73683166503906, | |
| "learning_rate": 3e-06, | |
| "loss": -5.9239, | |
| "step": 575 | |
| }, | |
| { | |
| "epoch": 0.0512, | |
| "grad_norm": 143.7006378173828, | |
| "learning_rate": 3e-06, | |
| "loss": -8.4902, | |
| "step": 576 | |
| }, | |
| { | |
| "completion_length": 243.7291717529297, | |
| "epoch": 0.05128888888888889, | |
| "grad_norm": 82.69302368164062, | |
| "learning_rate": 3e-06, | |
| "loss": 26.9064, | |
| "reward": 1.4583333730697632, | |
| "reward_std": 0.4701542556285858, | |
| "rewards/boxed_and_answer_tags_format_reward": 0.75, | |
| "rewards/correctness_reward_func_math": 0.708333358168602, | |
| "step": 577, | |
| "zero_std_ratio": 0.5 | |
| }, | |
| { | |
| "epoch": 0.05137777777777778, | |
| "grad_norm": 77.71623992919922, | |
| "learning_rate": 3e-06, | |
| "loss": 24.0495, | |
| "step": 578 | |
| }, | |
| { | |
| "epoch": 0.05146666666666667, | |
| "grad_norm": 80.28130340576172, | |
| "learning_rate": 3e-06, | |
| "loss": 19.1044, | |
| "step": 579 | |
| }, | |
| { | |
| "epoch": 0.051555555555555556, | |
| "grad_norm": 86.22237396240234, | |
| "learning_rate": 3e-06, | |
| "loss": 22.9667, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 0.051644444444444444, | |
| "grad_norm": 94.3071060180664, | |
| "learning_rate": 3e-06, | |
| "loss": 17.6854, | |
| "step": 581 | |
| }, | |
| { | |
| "epoch": 0.05173333333333333, | |
| "grad_norm": 86.01050567626953, | |
| "learning_rate": 3e-06, | |
| "loss": 28.4794, | |
| "step": 582 | |
| }, | |
| { | |
| "epoch": 0.05182222222222222, | |
| "grad_norm": 87.72801971435547, | |
| "learning_rate": 3e-06, | |
| "loss": 25.3194, | |
| "step": 583 | |
| }, | |
| { | |
| "epoch": 0.05191111111111111, | |
| "grad_norm": 74.66322326660156, | |
| "learning_rate": 3e-06, | |
| "loss": 22.9833, | |
| "step": 584 | |
| }, | |
| { | |
| "epoch": 0.052, | |
| "grad_norm": 90.72804260253906, | |
| "learning_rate": 3e-06, | |
| "loss": 17.4641, | |
| "step": 585 | |
| }, | |
| { | |
| "epoch": 0.052088888888888886, | |
| "grad_norm": 87.39917755126953, | |
| "learning_rate": 3e-06, | |
| "loss": 21.6816, | |
| "step": 586 | |
| }, | |
| { | |
| "epoch": 0.052177777777777774, | |
| "grad_norm": 85.64997100830078, | |
| "learning_rate": 3e-06, | |
| "loss": 15.9135, | |
| "step": 587 | |
| }, | |
| { | |
| "epoch": 0.05226666666666667, | |
| "grad_norm": 92.35039520263672, | |
| "learning_rate": 3e-06, | |
| "loss": 26.4856, | |
| "step": 588 | |
| }, | |
| { | |
| "completion_length": 248.3541717529297, | |
| "epoch": 0.05235555555555556, | |
| "grad_norm": 93.8902359008789, | |
| "learning_rate": 3e-06, | |
| "loss": 14.9976, | |
| "reward": 1.3541666865348816, | |
| "reward_std": 0.5839263796806335, | |
| "rewards/boxed_and_answer_tags_format_reward": 0.7291666865348816, | |
| "rewards/correctness_reward_func_math": 0.6250000149011612, | |
| "step": 589, | |
| "zero_std_ratio": 0.375 | |
| }, | |
| { | |
| "epoch": 0.052444444444444446, | |
| "grad_norm": 98.34622192382812, | |
| "learning_rate": 3e-06, | |
| "loss": 12.6228, | |
| "step": 590 | |
| }, | |
| { | |
| "epoch": 0.052533333333333335, | |
| "grad_norm": 108.93590545654297, | |
| "learning_rate": 3e-06, | |
| "loss": 9.3556, | |
| "step": 591 | |
| }, | |
| { | |
| "epoch": 0.05262222222222222, | |
| "grad_norm": 114.49545288085938, | |
| "learning_rate": 3e-06, | |
| "loss": 23.8665, | |
| "step": 592 | |
| }, | |
| { | |
| "epoch": 0.05271111111111111, | |
| "grad_norm": 102.37223052978516, | |
| "learning_rate": 3e-06, | |
| "loss": 7.3813, | |
| "step": 593 | |
| }, | |
| { | |
| "epoch": 0.0528, | |
| "grad_norm": 98.17306518554688, | |
| "learning_rate": 3e-06, | |
| "loss": 13.3691, | |
| "step": 594 | |
| }, | |
| { | |
| "epoch": 0.05288888888888889, | |
| "grad_norm": 95.84387969970703, | |
| "learning_rate": 3e-06, | |
| "loss": 13.8857, | |
| "step": 595 | |
| }, | |
| { | |
| "epoch": 0.052977777777777776, | |
| "grad_norm": 101.44741821289062, | |
| "learning_rate": 3e-06, | |
| "loss": 11.0637, | |
| "step": 596 | |
| }, | |
| { | |
| "epoch": 0.053066666666666665, | |
| "grad_norm": 96.87228393554688, | |
| "learning_rate": 3e-06, | |
| "loss": 7.1168, | |
| "step": 597 | |
| }, | |
| { | |
| "epoch": 0.05315555555555555, | |
| "grad_norm": 129.4097442626953, | |
| "learning_rate": 3e-06, | |
| "loss": 22.4722, | |
| "step": 598 | |
| }, | |
| { | |
| "epoch": 0.05324444444444444, | |
| "grad_norm": 100.4094467163086, | |
| "learning_rate": 3e-06, | |
| "loss": 4.9249, | |
| "step": 599 | |
| }, | |
| { | |
| "epoch": 0.05333333333333334, | |
| "grad_norm": 97.2879638671875, | |
| "learning_rate": 3e-06, | |
| "loss": 10.7864, | |
| "step": 600 | |
| }, | |
| { | |
| "completion_length": 249.31250762939453, | |
| "epoch": 0.053422222222222225, | |
| "grad_norm": 81.29678344726562, | |
| "learning_rate": 3e-06, | |
| "loss": -1.4361, | |
| "reward": 1.6458333730697632, | |
| "reward_std": 0.43528565764427185, | |
| "rewards/boxed_and_answer_tags_format_reward": 0.6875, | |
| "rewards/correctness_reward_func_math": 0.9583333432674408, | |
| "step": 601, | |
| "zero_std_ratio": 0.5 | |
| }, | |
| { | |
| "epoch": 0.05351111111111111, | |
| "grad_norm": 71.18397521972656, | |
| "learning_rate": 3e-06, | |
| "loss": -9.7138, | |
| "step": 602 | |
| }, | |
| { | |
| "epoch": 0.0536, | |
| "grad_norm": 79.09825897216797, | |
| "learning_rate": 3e-06, | |
| "loss": -7.5626, | |
| "step": 603 | |
| }, | |
| { | |
| "epoch": 0.05368888888888889, | |
| "grad_norm": 88.4588394165039, | |
| "learning_rate": 3e-06, | |
| "loss": -8.3433, | |
| "step": 604 | |
| }, | |
| { | |
| "epoch": 0.05377777777777778, | |
| "grad_norm": 75.85662841796875, | |
| "learning_rate": 3e-06, | |
| "loss": -3.126, | |
| "step": 605 | |
| }, | |
| { | |
| "epoch": 0.05386666666666667, | |
| "grad_norm": 76.75032806396484, | |
| "learning_rate": 3e-06, | |
| "loss": -4.1707, | |
| "step": 606 | |
| }, | |
| { | |
| "epoch": 0.053955555555555555, | |
| "grad_norm": 96.06957244873047, | |
| "learning_rate": 3e-06, | |
| "loss": -2.3824, | |
| "step": 607 | |
| }, | |
| { | |
| "epoch": 0.054044444444444444, | |
| "grad_norm": 108.4106674194336, | |
| "learning_rate": 3e-06, | |
| "loss": -10.8022, | |
| "step": 608 | |
| }, | |
| { | |
| "epoch": 0.05413333333333333, | |
| "grad_norm": 82.68360900878906, | |
| "learning_rate": 3e-06, | |
| "loss": -9.1987, | |
| "step": 609 | |
| }, | |
| { | |
| "epoch": 0.05422222222222222, | |
| "grad_norm": 92.35367584228516, | |
| "learning_rate": 3e-06, | |
| "loss": -9.379, | |
| "step": 610 | |
| }, | |
| { | |
| "epoch": 0.05431111111111111, | |
| "grad_norm": 78.61454010009766, | |
| "learning_rate": 3e-06, | |
| "loss": -4.4421, | |
| "step": 611 | |
| }, | |
| { | |
| "epoch": 0.0544, | |
| "grad_norm": 83.68685150146484, | |
| "learning_rate": 3e-06, | |
| "loss": -5.8651, | |
| "step": 612 | |
| }, | |
| { | |
| "completion_length": 252.8541717529297, | |
| "epoch": 0.05448888888888889, | |
| "grad_norm": 156.99725341796875, | |
| "learning_rate": 3e-06, | |
| "loss": -38.1088, | |
| "reward": 1.2395833730697632, | |
| "reward_std": 0.3936076909303665, | |
| "rewards/boxed_and_answer_tags_format_reward": 0.7395833432674408, | |
| "rewards/correctness_reward_func_math": 0.5, | |
| "step": 613, | |
| "zero_std_ratio": 0.5 | |
| }, | |
| { | |
| "epoch": 0.05457777777777778, | |
| "grad_norm": 82.39892578125, | |
| "learning_rate": 3e-06, | |
| "loss": -24.9067, | |
| "step": 614 | |
| }, | |
| { | |
| "epoch": 0.05466666666666667, | |
| "grad_norm": 76.3405532836914, | |
| "learning_rate": 3e-06, | |
| "loss": -21.2904, | |
| "step": 615 | |
| }, | |
| { | |
| "epoch": 0.05475555555555556, | |
| "grad_norm": 114.32887268066406, | |
| "learning_rate": 3e-06, | |
| "loss": -41.3273, | |
| "step": 616 | |
| }, | |
| { | |
| "epoch": 0.054844444444444446, | |
| "grad_norm": 90.10194396972656, | |
| "learning_rate": 3e-06, | |
| "loss": -19.8759, | |
| "step": 617 | |
| }, | |
| { | |
| "epoch": 0.054933333333333334, | |
| "grad_norm": 101.73013305664062, | |
| "learning_rate": 3e-06, | |
| "loss": -31.9803, | |
| "step": 618 | |
| }, | |
| { | |
| "epoch": 0.05502222222222222, | |
| "grad_norm": 107.18656921386719, | |
| "learning_rate": 3e-06, | |
| "loss": -38.8443, | |
| "step": 619 | |
| }, | |
| { | |
| "epoch": 0.05511111111111111, | |
| "grad_norm": 85.78763580322266, | |
| "learning_rate": 3e-06, | |
| "loss": -26.4852, | |
| "step": 620 | |
| }, | |
| { | |
| "epoch": 0.0552, | |
| "grad_norm": 78.22523498535156, | |
| "learning_rate": 3e-06, | |
| "loss": -22.9265, | |
| "step": 621 | |
| }, | |
| { | |
| "epoch": 0.05528888888888889, | |
| "grad_norm": 120.62594604492188, | |
| "learning_rate": 3e-06, | |
| "loss": -44.6351, | |
| "step": 622 | |
| }, | |
| { | |
| "epoch": 0.055377777777777776, | |
| "grad_norm": 93.32075500488281, | |
| "learning_rate": 3e-06, | |
| "loss": -21.4564, | |
| "step": 623 | |
| }, | |
| { | |
| "epoch": 0.055466666666666664, | |
| "grad_norm": 97.2727279663086, | |
| "learning_rate": 3e-06, | |
| "loss": -34.0292, | |
| "step": 624 | |
| }, | |
| { | |
| "completion_length": 247.5, | |
| "epoch": 0.05555555555555555, | |
| "grad_norm": 98.01384735107422, | |
| "learning_rate": 3e-06, | |
| "loss": -25.8639, | |
| "reward": 1.9895833730697632, | |
| "reward_std": 0.6326004266738892, | |
| "rewards/boxed_and_answer_tags_format_reward": 0.7395833432674408, | |
| "rewards/correctness_reward_func_math": 1.25, | |
| "step": 625, | |
| "zero_std_ratio": 0.25 | |
| }, | |
| { | |
| "epoch": 0.05564444444444445, | |
| "grad_norm": 110.7737045288086, | |
| "learning_rate": 3e-06, | |
| "loss": -26.4317, | |
| "step": 626 | |
| }, | |
| { | |
| "epoch": 0.055733333333333336, | |
| "grad_norm": 100.14824676513672, | |
| "learning_rate": 3e-06, | |
| "loss": -17.6826, | |
| "step": 627 | |
| }, | |
| { | |
| "epoch": 0.055822222222222224, | |
| "grad_norm": 95.32125854492188, | |
| "learning_rate": 3e-06, | |
| "loss": -13.7046, | |
| "step": 628 | |
| }, | |
| { | |
| "epoch": 0.05591111111111111, | |
| "grad_norm": 153.66207885742188, | |
| "learning_rate": 3e-06, | |
| "loss": -33.0736, | |
| "step": 629 | |
| }, | |
| { | |
| "epoch": 0.056, | |
| "grad_norm": 98.00647735595703, | |
| "learning_rate": 3e-06, | |
| "loss": -23.4488, | |
| "step": 630 | |
| }, | |
| { | |
| "epoch": 0.05608888888888889, | |
| "grad_norm": 101.93690490722656, | |
| "learning_rate": 3e-06, | |
| "loss": -27.788, | |
| "step": 631 | |
| }, | |
| { | |
| "epoch": 0.05617777777777778, | |
| "grad_norm": 109.6976089477539, | |
| "learning_rate": 3e-06, | |
| "loss": -27.8496, | |
| "step": 632 | |
| }, | |
| { | |
| "epoch": 0.056266666666666666, | |
| "grad_norm": 94.93986511230469, | |
| "learning_rate": 3e-06, | |
| "loss": -18.741, | |
| "step": 633 | |
| }, | |
| { | |
| "epoch": 0.056355555555555555, | |
| "grad_norm": 99.8827133178711, | |
| "learning_rate": 3e-06, | |
| "loss": -15.4788, | |
| "step": 634 | |
| }, | |
| { | |
| "epoch": 0.05644444444444444, | |
| "grad_norm": 157.86849975585938, | |
| "learning_rate": 3e-06, | |
| "loss": -35.8463, | |
| "step": 635 | |
| }, | |
| { | |
| "epoch": 0.05653333333333333, | |
| "grad_norm": 109.21646118164062, | |
| "learning_rate": 3e-06, | |
| "loss": -25.4238, | |
| "step": 636 | |
| }, | |
| { | |
| "completion_length": 247.5416717529297, | |
| "epoch": 0.05662222222222222, | |
| "grad_norm": 107.59577941894531, | |
| "learning_rate": 3e-06, | |
| "loss": 2.5406, | |
| "reward": 0.71875, | |
| "reward_std": 0.11004260182380676, | |
| "rewards/boxed_and_answer_tags_format_reward": 0.6770833432674408, | |
| "rewards/correctness_reward_func_math": 0.0416666679084301, | |
| "step": 637, | |
| "zero_std_ratio": 0.875 | |
| }, | |
| { | |
| "epoch": 0.05671111111111111, | |
| "grad_norm": 92.30776977539062, | |
| "learning_rate": 3e-06, | |
| "loss": 3.1641, | |
| "step": 638 | |
| }, | |
| { | |
| "epoch": 0.0568, | |
| "grad_norm": 41.486106872558594, | |
| "learning_rate": 3e-06, | |
| "loss": 4.5319, | |
| "step": 639 | |
| }, | |
| { | |
| "epoch": 0.05688888888888889, | |
| "grad_norm": 50.142147064208984, | |
| "learning_rate": 3e-06, | |
| "loss": 0.4783, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 0.05697777777777778, | |
| "grad_norm": 41.67461395263672, | |
| "learning_rate": 3e-06, | |
| "loss": 0.8438, | |
| "step": 641 | |
| }, | |
| { | |
| "epoch": 0.05706666666666667, | |
| "grad_norm": 51.555999755859375, | |
| "learning_rate": 3e-06, | |
| "loss": 0.3726, | |
| "step": 642 | |
| }, | |
| { | |
| "epoch": 0.05715555555555556, | |
| "grad_norm": 56.56801223754883, | |
| "learning_rate": 3e-06, | |
| "loss": 2.3059, | |
| "step": 643 | |
| }, | |
| { | |
| "epoch": 0.057244444444444445, | |
| "grad_norm": 57.52075958251953, | |
| "learning_rate": 3e-06, | |
| "loss": 2.0942, | |
| "step": 644 | |
| }, | |
| { | |
| "epoch": 0.05733333333333333, | |
| "grad_norm": 51.82474136352539, | |
| "learning_rate": 3e-06, | |
| "loss": 3.4151, | |
| "step": 645 | |
| }, | |
| { | |
| "epoch": 0.05742222222222222, | |
| "grad_norm": 45.45164108276367, | |
| "learning_rate": 3e-06, | |
| "loss": -0.6204, | |
| "step": 646 | |
| }, | |
| { | |
| "epoch": 0.05751111111111111, | |
| "grad_norm": 43.34312057495117, | |
| "learning_rate": 3e-06, | |
| "loss": -0.6824, | |
| "step": 647 | |
| }, | |
| { | |
| "epoch": 0.0576, | |
| "grad_norm": 88.9341812133789, | |
| "learning_rate": 3e-06, | |
| "loss": -0.345, | |
| "step": 648 | |
| }, | |
| { | |
| "completion_length": 246.81250762939453, | |
| "epoch": 0.05768888888888889, | |
| "grad_norm": 91.70360565185547, | |
| "learning_rate": 3e-06, | |
| "loss": -21.4258, | |
| "reward": 1.8541666865348816, | |
| "reward_std": 0.3332235887646675, | |
| "rewards/boxed_and_answer_tags_format_reward": 0.6875, | |
| "rewards/correctness_reward_func_math": 1.1666666567325592, | |
| "step": 649, | |
| "zero_std_ratio": 0.625 | |
| }, | |
| { | |
| "epoch": 0.057777777777777775, | |
| "grad_norm": 117.84858703613281, | |
| "learning_rate": 3e-06, | |
| "loss": -24.4443, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 0.057866666666666663, | |
| "grad_norm": 94.53849792480469, | |
| "learning_rate": 3e-06, | |
| "loss": -21.691, | |
| "step": 651 | |
| }, | |
| { | |
| "epoch": 0.05795555555555556, | |
| "grad_norm": 97.39710998535156, | |
| "learning_rate": 3e-06, | |
| "loss": -24.1024, | |
| "step": 652 | |
| }, | |
| { | |
| "epoch": 0.05804444444444445, | |
| "grad_norm": 90.82528686523438, | |
| "learning_rate": 3e-06, | |
| "loss": -23.0164, | |
| "step": 653 | |
| }, | |
| { | |
| "epoch": 0.058133333333333335, | |
| "grad_norm": 77.49068450927734, | |
| "learning_rate": 3e-06, | |
| "loss": -22.6643, | |
| "step": 654 | |
| }, | |
| { | |
| "epoch": 0.058222222222222224, | |
| "grad_norm": 87.06861114501953, | |
| "learning_rate": 3e-06, | |
| "loss": -22.1806, | |
| "step": 655 | |
| }, | |
| { | |
| "epoch": 0.05831111111111111, | |
| "grad_norm": 116.24286651611328, | |
| "learning_rate": 3e-06, | |
| "loss": -25.0616, | |
| "step": 656 | |
| }, | |
| { | |
| "epoch": 0.0584, | |
| "grad_norm": 80.90653228759766, | |
| "learning_rate": 3e-06, | |
| "loss": -23.1949, | |
| "step": 657 | |
| }, | |
| { | |
| "epoch": 0.05848888888888889, | |
| "grad_norm": 122.82756042480469, | |
| "learning_rate": 3e-06, | |
| "loss": -25.4261, | |
| "step": 658 | |
| }, | |
| { | |
| "epoch": 0.05857777777777778, | |
| "grad_norm": 87.51302337646484, | |
| "learning_rate": 3e-06, | |
| "loss": -24.0548, | |
| "step": 659 | |
| }, | |
| { | |
| "epoch": 0.058666666666666666, | |
| "grad_norm": 85.40345764160156, | |
| "learning_rate": 3e-06, | |
| "loss": -23.4326, | |
| "step": 660 | |
| }, | |
| { | |
| "completion_length": 250.9375, | |
| "epoch": 0.058755555555555554, | |
| "grad_norm": 40.29450607299805, | |
| "learning_rate": 3e-06, | |
| "loss": 9.1824, | |
| "reward": 0.8541666865348816, | |
| "reward_std": 0.10206206887960434, | |
| "rewards/boxed_and_answer_tags_format_reward": 0.5625, | |
| "rewards/correctness_reward_func_math": 0.2916666567325592, | |
| "step": 661, | |
| "zero_std_ratio": 0.875 | |
| }, | |
| { | |
| "epoch": 0.05884444444444444, | |
| "grad_norm": 53.10469436645508, | |
| "learning_rate": 3e-06, | |
| "loss": 5.2732, | |
| "step": 662 | |
| }, | |
| { | |
| "epoch": 0.05893333333333333, | |
| "grad_norm": 53.52404022216797, | |
| "learning_rate": 3e-06, | |
| "loss": 9.7305, | |
| "step": 663 | |
| }, | |
| { | |
| "epoch": 0.05902222222222222, | |
| "grad_norm": 43.00156021118164, | |
| "learning_rate": 3e-06, | |
| "loss": 10.5135, | |
| "step": 664 | |
| }, | |
| { | |
| "epoch": 0.059111111111111114, | |
| "grad_norm": 51.94622802734375, | |
| "learning_rate": 3e-06, | |
| "loss": 8.7434, | |
| "step": 665 | |
| }, | |
| { | |
| "epoch": 0.0592, | |
| "grad_norm": 45.08756637573242, | |
| "learning_rate": 3e-06, | |
| "loss": 10.9107, | |
| "step": 666 | |
| }, | |
| { | |
| "epoch": 0.05928888888888889, | |
| "grad_norm": 38.80097961425781, | |
| "learning_rate": 3e-06, | |
| "loss": 9.0445, | |
| "step": 667 | |
| }, | |
| { | |
| "epoch": 0.05937777777777778, | |
| "grad_norm": 50.45252227783203, | |
| "learning_rate": 3e-06, | |
| "loss": 4.2955, | |
| "step": 668 | |
| }, | |
| { | |
| "epoch": 0.05946666666666667, | |
| "grad_norm": 42.282501220703125, | |
| "learning_rate": 3e-06, | |
| "loss": 8.7618, | |
| "step": 669 | |
| }, | |
| { | |
| "epoch": 0.059555555555555556, | |
| "grad_norm": 40.22513961791992, | |
| "learning_rate": 3e-06, | |
| "loss": 9.5891, | |
| "step": 670 | |
| }, | |
| { | |
| "epoch": 0.059644444444444444, | |
| "grad_norm": 55.79698181152344, | |
| "learning_rate": 3e-06, | |
| "loss": 7.7516, | |
| "step": 671 | |
| }, | |
| { | |
| "epoch": 0.05973333333333333, | |
| "grad_norm": 41.462181091308594, | |
| "learning_rate": 3e-06, | |
| "loss": 10.17, | |
| "step": 672 | |
| }, | |
| { | |
| "completion_length": 238.4375, | |
| "epoch": 0.05982222222222222, | |
| "grad_norm": 95.73463439941406, | |
| "learning_rate": 3e-06, | |
| "loss": 14.012, | |
| "reward": 1.4583333730697632, | |
| "reward_std": 0.3410547822713852, | |
| "rewards/boxed_and_answer_tags_format_reward": 0.75, | |
| "rewards/correctness_reward_func_math": 0.7083333134651184, | |
| "step": 673, | |
| "zero_std_ratio": 0.625 | |
| }, | |
| { | |
| "epoch": 0.05991111111111111, | |
| "grad_norm": 92.55806732177734, | |
| "learning_rate": 3e-06, | |
| "loss": 7.0593, | |
| "step": 674 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "grad_norm": 90.97936248779297, | |
| "learning_rate": 3e-06, | |
| "loss": 6.1863, | |
| "step": 675 | |
| }, | |
| { | |
| "epoch": 0.060088888888888886, | |
| "grad_norm": 82.84268951416016, | |
| "learning_rate": 3e-06, | |
| "loss": -3.7791, | |
| "step": 676 | |
| }, | |
| { | |
| "epoch": 0.060177777777777774, | |
| "grad_norm": 93.55880737304688, | |
| "learning_rate": 3e-06, | |
| "loss": 1.1706, | |
| "step": 677 | |
| }, | |
| { | |
| "epoch": 0.06026666666666667, | |
| "grad_norm": 143.54983520507812, | |
| "learning_rate": 3e-06, | |
| "loss": 5.2435, | |
| "step": 678 | |
| }, | |
| { | |
| "epoch": 0.06035555555555556, | |
| "grad_norm": 103.67829132080078, | |
| "learning_rate": 3e-06, | |
| "loss": 12.2804, | |
| "step": 679 | |
| }, | |
| { | |
| "epoch": 0.060444444444444446, | |
| "grad_norm": 94.47793579101562, | |
| "learning_rate": 3e-06, | |
| "loss": 5.8252, | |
| "step": 680 | |
| }, | |
| { | |
| "epoch": 0.060533333333333335, | |
| "grad_norm": 87.38697814941406, | |
| "learning_rate": 3e-06, | |
| "loss": 4.5993, | |
| "step": 681 | |
| }, | |
| { | |
| "epoch": 0.06062222222222222, | |
| "grad_norm": 79.12389373779297, | |
| "learning_rate": 3e-06, | |
| "loss": -4.8813, | |
| "step": 682 | |
| }, | |
| { | |
| "epoch": 0.06071111111111111, | |
| "grad_norm": 97.0263900756836, | |
| "learning_rate": 3e-06, | |
| "loss": -0.474, | |
| "step": 683 | |
| }, | |
| { | |
| "epoch": 0.0608, | |
| "grad_norm": 183.30641174316406, | |
| "learning_rate": 3e-06, | |
| "loss": 2.9729, | |
| "step": 684 | |
| }, | |
| { | |
| "completion_length": 255.4166717529297, | |
| "epoch": 0.06088888888888889, | |
| "grad_norm": 119.54466247558594, | |
| "learning_rate": 3e-06, | |
| "loss": -13.0228, | |
| "reward": 1.4583333730697632, | |
| "reward_std": 0.505022794008255, | |
| "rewards/boxed_and_answer_tags_format_reward": 0.75, | |
| "rewards/correctness_reward_func_math": 0.708333358168602, | |
| "step": 685, | |
| "zero_std_ratio": 0.5 | |
| }, | |
| { | |
| "epoch": 0.06097777777777778, | |
| "grad_norm": 121.260986328125, | |
| "learning_rate": 3e-06, | |
| "loss": -7.3608, | |
| "step": 686 | |
| }, | |
| { | |
| "epoch": 0.061066666666666665, | |
| "grad_norm": 107.9063720703125, | |
| "learning_rate": 3e-06, | |
| "loss": -2.885, | |
| "step": 687 | |
| }, | |
| { | |
| "epoch": 0.06115555555555555, | |
| "grad_norm": 122.71879577636719, | |
| "learning_rate": 3e-06, | |
| "loss": -9.4339, | |
| "step": 688 | |
| }, | |
| { | |
| "epoch": 0.06124444444444444, | |
| "grad_norm": 98.11092376708984, | |
| "learning_rate": 3e-06, | |
| "loss": -7.9372, | |
| "step": 689 | |
| }, | |
| { | |
| "epoch": 0.06133333333333333, | |
| "grad_norm": 106.66928100585938, | |
| "learning_rate": 3e-06, | |
| "loss": 2.2675, | |
| "step": 690 | |
| }, | |
| { | |
| "epoch": 0.061422222222222225, | |
| "grad_norm": 117.10845947265625, | |
| "learning_rate": 3e-06, | |
| "loss": -13.9389, | |
| "step": 691 | |
| }, | |
| { | |
| "epoch": 0.061511111111111114, | |
| "grad_norm": 123.24708557128906, | |
| "learning_rate": 3e-06, | |
| "loss": -9.064, | |
| "step": 692 | |
| }, | |
| { | |
| "epoch": 0.0616, | |
| "grad_norm": 108.9686050415039, | |
| "learning_rate": 3e-06, | |
| "loss": -4.3708, | |
| "step": 693 | |
| }, | |
| { | |
| "epoch": 0.06168888888888889, | |
| "grad_norm": 120.98512268066406, | |
| "learning_rate": 3e-06, | |
| "loss": -10.9314, | |
| "step": 694 | |
| }, | |
| { | |
| "epoch": 0.06177777777777778, | |
| "grad_norm": 96.25732421875, | |
| "learning_rate": 3e-06, | |
| "loss": -9.4401, | |
| "step": 695 | |
| }, | |
| { | |
| "epoch": 0.06186666666666667, | |
| "grad_norm": 107.97279357910156, | |
| "learning_rate": 3e-06, | |
| "loss": 0.1328, | |
| "step": 696 | |
| }, | |
| { | |
| "completion_length": 242.4166717529297, | |
| "epoch": 0.061955555555555555, | |
| "grad_norm": 77.24815368652344, | |
| "learning_rate": 3e-06, | |
| "loss": 21.7633, | |
| "reward": 1.6875, | |
| "reward_std": 0.3680921494960785, | |
| "rewards/boxed_and_answer_tags_format_reward": 0.6875, | |
| "rewards/correctness_reward_func_math": 1.0, | |
| "step": 697, | |
| "zero_std_ratio": 0.625 | |
| }, | |
| { | |
| "epoch": 0.062044444444444444, | |
| "grad_norm": 90.61959075927734, | |
| "learning_rate": 3e-06, | |
| "loss": 21.3004, | |
| "step": 698 | |
| }, | |
| { | |
| "epoch": 0.06213333333333333, | |
| "grad_norm": 79.43978881835938, | |
| "learning_rate": 3e-06, | |
| "loss": 22.885, | |
| "step": 699 | |
| }, | |
| { | |
| "epoch": 0.06222222222222222, | |
| "grad_norm": 88.06346130371094, | |
| "learning_rate": 3e-06, | |
| "loss": 16.6794, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 0.06231111111111111, | |
| "grad_norm": 215.39535522460938, | |
| "learning_rate": 3e-06, | |
| "loss": 18.8777, | |
| "step": 701 | |
| }, | |
| { | |
| "epoch": 0.0624, | |
| "grad_norm": 92.26004791259766, | |
| "learning_rate": 3e-06, | |
| "loss": 11.0055, | |
| "step": 702 | |
| }, | |
| { | |
| "epoch": 0.062488888888888885, | |
| "grad_norm": 77.40907287597656, | |
| "learning_rate": 3e-06, | |
| "loss": 20.7747, | |
| "step": 703 | |
| }, | |
| { | |
| "epoch": 0.06257777777777777, | |
| "grad_norm": 100.23749542236328, | |
| "learning_rate": 3e-06, | |
| "loss": 20.3858, | |
| "step": 704 | |
| }, | |
| { | |
| "epoch": 0.06266666666666666, | |
| "grad_norm": 75.75386047363281, | |
| "learning_rate": 3e-06, | |
| "loss": 22.1331, | |
| "step": 705 | |
| }, | |
| { | |
| "epoch": 0.06275555555555555, | |
| "grad_norm": 88.5040054321289, | |
| "learning_rate": 3e-06, | |
| "loss": 15.6211, | |
| "step": 706 | |
| }, | |
| { | |
| "epoch": 0.06284444444444444, | |
| "grad_norm": 83.19754028320312, | |
| "learning_rate": 3e-06, | |
| "loss": 17.9444, | |
| "step": 707 | |
| }, | |
| { | |
| "epoch": 0.06293333333333333, | |
| "grad_norm": 88.55027770996094, | |
| "learning_rate": 3e-06, | |
| "loss": 9.7909, | |
| "step": 708 | |
| }, | |
| { | |
| "completion_length": 253.8541717529297, | |
| "epoch": 0.06302222222222222, | |
| "grad_norm": 106.40866088867188, | |
| "learning_rate": 3e-06, | |
| "loss": -1.6061, | |
| "reward": 1.2708333730697632, | |
| "reward_std": 0.38547582924366, | |
| "rewards/boxed_and_answer_tags_format_reward": 0.6875, | |
| "rewards/correctness_reward_func_math": 0.5833333432674408, | |
| "step": 709, | |
| "zero_std_ratio": 0.25 | |
| }, | |
| { | |
| "epoch": 0.06311111111111112, | |
| "grad_norm": 123.38434600830078, | |
| "learning_rate": 3e-06, | |
| "loss": -9.3301, | |
| "step": 710 | |
| }, | |
| { | |
| "epoch": 0.0632, | |
| "grad_norm": 101.67454528808594, | |
| "learning_rate": 3e-06, | |
| "loss": -7.1273, | |
| "step": 711 | |
| }, | |
| { | |
| "epoch": 0.0632888888888889, | |
| "grad_norm": 112.82793426513672, | |
| "learning_rate": 3e-06, | |
| "loss": -9.8195, | |
| "step": 712 | |
| }, | |
| { | |
| "epoch": 0.06337777777777778, | |
| "grad_norm": 108.99236297607422, | |
| "learning_rate": 3e-06, | |
| "loss": -2.3741, | |
| "step": 713 | |
| }, | |
| { | |
| "epoch": 0.06346666666666667, | |
| "grad_norm": 106.9615478515625, | |
| "learning_rate": 3e-06, | |
| "loss": -7.792, | |
| "step": 714 | |
| }, | |
| { | |
| "epoch": 0.06355555555555556, | |
| "grad_norm": 94.91867065429688, | |
| "learning_rate": 3e-06, | |
| "loss": -3.0819, | |
| "step": 715 | |
| }, | |
| { | |
| "epoch": 0.06364444444444445, | |
| "grad_norm": 114.82243347167969, | |
| "learning_rate": 3e-06, | |
| "loss": -11.4017, | |
| "step": 716 | |
| }, | |
| { | |
| "epoch": 0.06373333333333334, | |
| "grad_norm": 104.33937072753906, | |
| "learning_rate": 3e-06, | |
| "loss": -9.7339, | |
| "step": 717 | |
| }, | |
| { | |
| "epoch": 0.06382222222222222, | |
| "grad_norm": 136.31576538085938, | |
| "learning_rate": 3e-06, | |
| "loss": -12.1719, | |
| "step": 718 | |
| }, | |
| { | |
| "epoch": 0.06391111111111111, | |
| "grad_norm": 99.78816223144531, | |
| "learning_rate": 3e-06, | |
| "loss": -4.7219, | |
| "step": 719 | |
| }, | |
| { | |
| "epoch": 0.064, | |
| "grad_norm": 120.37998962402344, | |
| "learning_rate": 3e-06, | |
| "loss": -10.5969, | |
| "step": 720 | |
| }, | |
| { | |
| "completion_length": 244.0416717529297, | |
| "epoch": 0.06408888888888889, | |
| "grad_norm": 101.32681274414062, | |
| "learning_rate": 3e-06, | |
| "loss": 2.1527, | |
| "reward": 1.0, | |
| "reward_std": 0.4417443424463272, | |
| "rewards/boxed_and_answer_tags_format_reward": 0.6666666865348816, | |
| "rewards/correctness_reward_func_math": 0.3333333246409893, | |
| "step": 721, | |
| "zero_std_ratio": 0.375 | |
| }, | |
| { | |
| "epoch": 0.06417777777777778, | |
| "grad_norm": 113.38105773925781, | |
| "learning_rate": 3e-06, | |
| "loss": 11.181, | |
| "step": 722 | |
| }, | |
| { | |
| "epoch": 0.06426666666666667, | |
| "grad_norm": 114.85003662109375, | |
| "learning_rate": 3e-06, | |
| "loss": -2.1413, | |
| "step": 723 | |
| }, | |
| { | |
| "epoch": 0.06435555555555555, | |
| "grad_norm": 133.62515258789062, | |
| "learning_rate": 3e-06, | |
| "loss": 9.3709, | |
| "step": 724 | |
| }, | |
| { | |
| "epoch": 0.06444444444444444, | |
| "grad_norm": 113.68856048583984, | |
| "learning_rate": 3e-06, | |
| "loss": -3.0918, | |
| "step": 725 | |
| }, | |
| { | |
| "epoch": 0.06453333333333333, | |
| "grad_norm": 161.93836975097656, | |
| "learning_rate": 3e-06, | |
| "loss": -1.9418, | |
| "step": 726 | |
| }, | |
| { | |
| "epoch": 0.06462222222222222, | |
| "grad_norm": 100.46546173095703, | |
| "learning_rate": 3e-06, | |
| "loss": 0.5341, | |
| "step": 727 | |
| }, | |
| { | |
| "epoch": 0.06471111111111111, | |
| "grad_norm": 141.3654022216797, | |
| "learning_rate": 3e-06, | |
| "loss": 9.9107, | |
| "step": 728 | |
| }, | |
| { | |
| "epoch": 0.0648, | |
| "grad_norm": 131.8665771484375, | |
| "learning_rate": 3e-06, | |
| "loss": -3.674, | |
| "step": 729 | |
| }, | |
| { | |
| "epoch": 0.06488888888888888, | |
| "grad_norm": 128.03195190429688, | |
| "learning_rate": 3e-06, | |
| "loss": 6.8735, | |
| "step": 730 | |
| }, | |
| { | |
| "epoch": 0.06497777777777777, | |
| "grad_norm": 117.97486114501953, | |
| "learning_rate": 3e-06, | |
| "loss": -5.1859, | |
| "step": 731 | |
| }, | |
| { | |
| "epoch": 0.06506666666666666, | |
| "grad_norm": 158.5392303466797, | |
| "learning_rate": 3e-06, | |
| "loss": -4.222, | |
| "step": 732 | |
| }, | |
| { | |
| "completion_length": 249.08334350585938, | |
| "epoch": 0.06515555555555555, | |
| "grad_norm": 122.15642547607422, | |
| "learning_rate": 3e-06, | |
| "loss": 5.173, | |
| "reward": 1.1770833730697632, | |
| "reward_std": 0.2915456295013428, | |
| "rewards/boxed_and_answer_tags_format_reward": 0.6354166567325592, | |
| "rewards/correctness_reward_func_math": 0.5416666567325592, | |
| "step": 733, | |
| "zero_std_ratio": 0.625 | |
| }, | |
| { | |
| "epoch": 0.06524444444444444, | |
| "grad_norm": 122.5665054321289, | |
| "learning_rate": 3e-06, | |
| "loss": -10.2692, | |
| "step": 734 | |
| }, | |
| { | |
| "epoch": 0.06533333333333333, | |
| "grad_norm": 95.55500030517578, | |
| "learning_rate": 3e-06, | |
| "loss": -4.3816, | |
| "step": 735 | |
| }, | |
| { | |
| "epoch": 0.06542222222222223, | |
| "grad_norm": 111.13971710205078, | |
| "learning_rate": 3e-06, | |
| "loss": -4.5502, | |
| "step": 736 | |
| }, | |
| { | |
| "epoch": 0.06551111111111112, | |
| "grad_norm": 98.50959014892578, | |
| "learning_rate": 3e-06, | |
| "loss": -5.0919, | |
| "step": 737 | |
| }, | |
| { | |
| "epoch": 0.0656, | |
| "grad_norm": 82.98762512207031, | |
| "learning_rate": 3e-06, | |
| "loss": -7.9156, | |
| "step": 738 | |
| }, | |
| { | |
| "epoch": 0.0656888888888889, | |
| "grad_norm": 114.31904602050781, | |
| "learning_rate": 3e-06, | |
| "loss": 4.2146, | |
| "step": 739 | |
| }, | |
| { | |
| "epoch": 0.06577777777777778, | |
| "grad_norm": 109.786376953125, | |
| "learning_rate": 3e-06, | |
| "loss": -11.7776, | |
| "step": 740 | |
| }, | |
| { | |
| "epoch": 0.06586666666666667, | |
| "grad_norm": 95.00721740722656, | |
| "learning_rate": 3e-06, | |
| "loss": -5.7331, | |
| "step": 741 | |
| }, | |
| { | |
| "epoch": 0.06595555555555556, | |
| "grad_norm": 87.01516723632812, | |
| "learning_rate": 3e-06, | |
| "loss": -5.0034, | |
| "step": 742 | |
| }, | |
| { | |
| "epoch": 0.06604444444444445, | |
| "grad_norm": 101.2181167602539, | |
| "learning_rate": 3e-06, | |
| "loss": -6.7069, | |
| "step": 743 | |
| }, | |
| { | |
| "epoch": 0.06613333333333334, | |
| "grad_norm": 79.3046875, | |
| "learning_rate": 3e-06, | |
| "loss": -8.6759, | |
| "step": 744 | |
| }, | |
| { | |
| "completion_length": 252.93750762939453, | |
| "epoch": 0.06622222222222222, | |
| "grad_norm": 101.21440887451172, | |
| "learning_rate": 3e-06, | |
| "loss": 2.4358, | |
| "reward": 1.354166716337204, | |
| "reward_std": 0.4736091196537018, | |
| "rewards/boxed_and_answer_tags_format_reward": 0.6458333432674408, | |
| "rewards/correctness_reward_func_math": 0.7083333358168602, | |
| "step": 745, | |
| "zero_std_ratio": 0.375 | |
| }, | |
| { | |
| "epoch": 0.06631111111111111, | |
| "grad_norm": 133.58985900878906, | |
| "learning_rate": 3e-06, | |
| "loss": 0.2243, | |
| "step": 746 | |
| }, | |
| { | |
| "epoch": 0.0664, | |
| "grad_norm": 137.97776794433594, | |
| "learning_rate": 3e-06, | |
| "loss": 3.7561, | |
| "step": 747 | |
| }, | |
| { | |
| "epoch": 0.06648888888888889, | |
| "grad_norm": 97.12255859375, | |
| "learning_rate": 3e-06, | |
| "loss": 2.8029, | |
| "step": 748 | |
| }, | |
| { | |
| "epoch": 0.06657777777777778, | |
| "grad_norm": 133.5359344482422, | |
| "learning_rate": 3e-06, | |
| "loss": 5.9185, | |
| "step": 749 | |
| }, | |
| { | |
| "epoch": 0.06666666666666667, | |
| "grad_norm": 165.00294494628906, | |
| "learning_rate": 3e-06, | |
| "loss": 5.6118, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 0.06675555555555555, | |
| "grad_norm": 147.9979705810547, | |
| "learning_rate": 3e-06, | |
| "loss": 1.9697, | |
| "step": 751 | |
| }, | |
| { | |
| "epoch": 0.06684444444444444, | |
| "grad_norm": 119.22462463378906, | |
| "learning_rate": 3e-06, | |
| "loss": -0.882, | |
| "step": 752 | |
| }, | |
| { | |
| "epoch": 0.06693333333333333, | |
| "grad_norm": 144.49305725097656, | |
| "learning_rate": 3e-06, | |
| "loss": 2.487, | |
| "step": 753 | |
| }, | |
| { | |
| "epoch": 0.06702222222222222, | |
| "grad_norm": 96.12986755371094, | |
| "learning_rate": 3e-06, | |
| "loss": 1.0482, | |
| "step": 754 | |
| }, | |
| { | |
| "epoch": 0.06711111111111111, | |
| "grad_norm": 117.53173065185547, | |
| "learning_rate": 3e-06, | |
| "loss": 4.1922, | |
| "step": 755 | |
| }, | |
| { | |
| "epoch": 0.0672, | |
| "grad_norm": 99.70003509521484, | |
| "learning_rate": 3e-06, | |
| "loss": 4.014, | |
| "step": 756 | |
| }, | |
| { | |
| "completion_length": 252.9166717529297, | |
| "epoch": 0.06728888888888888, | |
| "grad_norm": 107.6641616821289, | |
| "learning_rate": 3e-06, | |
| "loss": -6.2275, | |
| "reward": 1.7812500596046448, | |
| "reward_std": 0.38577648997306824, | |
| "rewards/boxed_and_answer_tags_format_reward": 0.7395833432674408, | |
| "rewards/correctness_reward_func_math": 1.0416666567325592, | |
| "step": 757, | |
| "zero_std_ratio": 0.5 | |
| }, | |
| { | |
| "epoch": 0.06737777777777777, | |
| "grad_norm": 87.34847259521484, | |
| "learning_rate": 3e-06, | |
| "loss": -3.2976, | |
| "step": 758 | |
| }, | |
| { | |
| "epoch": 0.06746666666666666, | |
| "grad_norm": 114.33875274658203, | |
| "learning_rate": 3e-06, | |
| "loss": -4.307, | |
| "step": 759 | |
| }, | |
| { | |
| "epoch": 0.06755555555555555, | |
| "grad_norm": 106.53048706054688, | |
| "learning_rate": 3e-06, | |
| "loss": -1.13, | |
| "step": 760 | |
| }, | |
| { | |
| "epoch": 0.06764444444444444, | |
| "grad_norm": 120.27633666992188, | |
| "learning_rate": 3e-06, | |
| "loss": -3.2187, | |
| "step": 761 | |
| }, | |
| { | |
| "epoch": 0.06773333333333334, | |
| "grad_norm": 96.025390625, | |
| "learning_rate": 3e-06, | |
| "loss": 0.3329, | |
| "step": 762 | |
| }, | |
| { | |
| "epoch": 0.06782222222222223, | |
| "grad_norm": 115.00994110107422, | |
| "learning_rate": 3e-06, | |
| "loss": -7.6432, | |
| "step": 763 | |
| }, | |
| { | |
| "epoch": 0.06791111111111112, | |
| "grad_norm": 87.76792907714844, | |
| "learning_rate": 3e-06, | |
| "loss": -4.6143, | |
| "step": 764 | |
| }, | |
| { | |
| "epoch": 0.068, | |
| "grad_norm": 113.94709777832031, | |
| "learning_rate": 3e-06, | |
| "loss": -5.7334, | |
| "step": 765 | |
| }, | |
| { | |
| "epoch": 0.0680888888888889, | |
| "grad_norm": 111.59996032714844, | |
| "learning_rate": 3e-06, | |
| "loss": -2.659, | |
| "step": 766 | |
| }, | |
| { | |
| "epoch": 0.06817777777777778, | |
| "grad_norm": 129.63861083984375, | |
| "learning_rate": 3e-06, | |
| "loss": -5.0582, | |
| "step": 767 | |
| }, | |
| { | |
| "epoch": 0.06826666666666667, | |
| "grad_norm": 93.2347412109375, | |
| "learning_rate": 3e-06, | |
| "loss": -1.0268, | |
| "step": 768 | |
| }, | |
| { | |
| "completion_length": 251.52083587646484, | |
| "epoch": 0.06835555555555556, | |
| "grad_norm": 184.61167907714844, | |
| "learning_rate": 3e-06, | |
| "loss": 5.0221, | |
| "reward": 1.4166666865348816, | |
| "reward_std": 0.6262910515069962, | |
| "rewards/boxed_and_answer_tags_format_reward": 0.75, | |
| "rewards/correctness_reward_func_math": 0.6666666567325592, | |
| "step": 769, | |
| "zero_std_ratio": 0.375 | |
| }, | |
| { | |
| "epoch": 0.06844444444444445, | |
| "grad_norm": 137.45404052734375, | |
| "learning_rate": 3e-06, | |
| "loss": 7.8168, | |
| "step": 770 | |
| }, | |
| { | |
| "epoch": 0.06853333333333333, | |
| "grad_norm": 202.94410705566406, | |
| "learning_rate": 3e-06, | |
| "loss": 9.1514, | |
| "step": 771 | |
| }, | |
| { | |
| "epoch": 0.06862222222222222, | |
| "grad_norm": 161.6815948486328, | |
| "learning_rate": 3e-06, | |
| "loss": 8.282, | |
| "step": 772 | |
| }, | |
| { | |
| "epoch": 0.06871111111111111, | |
| "grad_norm": 123.37694549560547, | |
| "learning_rate": 3e-06, | |
| "loss": 3.5652, | |
| "step": 773 | |
| }, | |
| { | |
| "epoch": 0.0688, | |
| "grad_norm": 146.2295379638672, | |
| "learning_rate": 3e-06, | |
| "loss": 9.5204, | |
| "step": 774 | |
| }, | |
| { | |
| "epoch": 0.06888888888888889, | |
| "grad_norm": 145.88613891601562, | |
| "learning_rate": 3e-06, | |
| "loss": 4.6675, | |
| "step": 775 | |
| }, | |
| { | |
| "epoch": 0.06897777777777778, | |
| "grad_norm": 135.99313354492188, | |
| "learning_rate": 3e-06, | |
| "loss": 6.1798, | |
| "step": 776 | |
| }, | |
| { | |
| "epoch": 0.06906666666666667, | |
| "grad_norm": 134.66729736328125, | |
| "learning_rate": 3e-06, | |
| "loss": 6.9994, | |
| "step": 777 | |
| }, | |
| { | |
| "epoch": 0.06915555555555555, | |
| "grad_norm": 173.39735412597656, | |
| "learning_rate": 3e-06, | |
| "loss": 7.3314, | |
| "step": 778 | |
| }, | |
| { | |
| "epoch": 0.06924444444444444, | |
| "grad_norm": 136.74331665039062, | |
| "learning_rate": 3e-06, | |
| "loss": 1.5412, | |
| "step": 779 | |
| }, | |
| { | |
| "epoch": 0.06933333333333333, | |
| "grad_norm": 142.06529235839844, | |
| "learning_rate": 3e-06, | |
| "loss": 7.5, | |
| "step": 780 | |
| }, | |
| { | |
| "completion_length": 236.14583587646484, | |
| "epoch": 0.06942222222222222, | |
| "grad_norm": 92.94062805175781, | |
| "learning_rate": 3e-06, | |
| "loss": 9.4998, | |
| "reward": 0.8854166865348816, | |
| "reward_std": 0.3922351598739624, | |
| "rewards/boxed_and_answer_tags_format_reward": 0.71875, | |
| "rewards/correctness_reward_func_math": 0.1666666679084301, | |
| "step": 781, | |
| "zero_std_ratio": 0.375 | |
| }, | |
| { | |
| "epoch": 0.0695111111111111, | |
| "grad_norm": 84.2685546875, | |
| "learning_rate": 3e-06, | |
| "loss": 20.3539, | |
| "step": 782 | |
| }, | |
| { | |
| "epoch": 0.0696, | |
| "grad_norm": 79.0595932006836, | |
| "learning_rate": 3e-06, | |
| "loss": 16.2649, | |
| "step": 783 | |
| }, | |
| { | |
| "epoch": 0.06968888888888888, | |
| "grad_norm": 151.0120086669922, | |
| "learning_rate": 3e-06, | |
| "loss": 13.9466, | |
| "step": 784 | |
| }, | |
| { | |
| "epoch": 0.06977777777777777, | |
| "grad_norm": 84.83601379394531, | |
| "learning_rate": 3e-06, | |
| "loss": 13.7201, | |
| "step": 785 | |
| }, | |
| { | |
| "epoch": 0.06986666666666666, | |
| "grad_norm": 149.62045288085938, | |
| "learning_rate": 3e-06, | |
| "loss": 13.032, | |
| "step": 786 | |
| }, | |
| { | |
| "epoch": 0.06995555555555556, | |
| "grad_norm": 95.56779479980469, | |
| "learning_rate": 3e-06, | |
| "loss": 8.1661, | |
| "step": 787 | |
| }, | |
| { | |
| "epoch": 0.07004444444444445, | |
| "grad_norm": 89.89787292480469, | |
| "learning_rate": 3e-06, | |
| "loss": 19.6015, | |
| "step": 788 | |
| }, | |
| { | |
| "epoch": 0.07013333333333334, | |
| "grad_norm": 78.87327575683594, | |
| "learning_rate": 3e-06, | |
| "loss": 15.0965, | |
| "step": 789 | |
| }, | |
| { | |
| "epoch": 0.07022222222222223, | |
| "grad_norm": 88.94290161132812, | |
| "learning_rate": 3e-06, | |
| "loss": 12.3248, | |
| "step": 790 | |
| }, | |
| { | |
| "epoch": 0.07031111111111112, | |
| "grad_norm": 84.62553405761719, | |
| "learning_rate": 3e-06, | |
| "loss": 12.571, | |
| "step": 791 | |
| }, | |
| { | |
| "epoch": 0.0704, | |
| "grad_norm": 102.37629699707031, | |
| "learning_rate": 3e-06, | |
| "loss": 11.5955, | |
| "step": 792 | |
| }, | |
| { | |
| "completion_length": 245.95834350585938, | |
| "epoch": 0.07048888888888889, | |
| "grad_norm": 87.68032836914062, | |
| "learning_rate": 3e-06, | |
| "loss": -2.896, | |
| "reward": 0.979166716337204, | |
| "reward_std": 0.3332235962152481, | |
| "rewards/boxed_and_answer_tags_format_reward": 0.5625, | |
| "rewards/correctness_reward_func_math": 0.416666679084301, | |
| "step": 793, | |
| "zero_std_ratio": 0.625 | |
| }, | |
| { | |
| "epoch": 0.07057777777777778, | |
| "grad_norm": 102.35977935791016, | |
| "learning_rate": 3e-06, | |
| "loss": -14.1397, | |
| "step": 794 | |
| }, | |
| { | |
| "epoch": 0.07066666666666667, | |
| "grad_norm": 121.52274322509766, | |
| "learning_rate": 3e-06, | |
| "loss": -20.6379, | |
| "step": 795 | |
| }, | |
| { | |
| "epoch": 0.07075555555555556, | |
| "grad_norm": 123.77271270751953, | |
| "learning_rate": 3e-06, | |
| "loss": -15.954, | |
| "step": 796 | |
| }, | |
| { | |
| "epoch": 0.07084444444444445, | |
| "grad_norm": 115.74909210205078, | |
| "learning_rate": 3e-06, | |
| "loss": -19.257, | |
| "step": 797 | |
| }, | |
| { | |
| "epoch": 0.07093333333333333, | |
| "grad_norm": 171.18011474609375, | |
| "learning_rate": 3e-06, | |
| "loss": -15.1334, | |
| "step": 798 | |
| }, | |
| { | |
| "epoch": 0.07102222222222222, | |
| "grad_norm": 92.46512603759766, | |
| "learning_rate": 3e-06, | |
| "loss": -3.7242, | |
| "step": 799 | |
| }, | |
| { | |
| "epoch": 0.07111111111111111, | |
| "grad_norm": 104.15264129638672, | |
| "learning_rate": 3e-06, | |
| "loss": -14.9942, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 0.0712, | |
| "grad_norm": 120.8930892944336, | |
| "learning_rate": 3e-06, | |
| "loss": -22.0188, | |
| "step": 801 | |
| }, | |
| { | |
| "epoch": 0.07128888888888889, | |
| "grad_norm": 112.13275146484375, | |
| "learning_rate": 3e-06, | |
| "loss": -17.6709, | |
| "step": 802 | |
| }, | |
| { | |
| "epoch": 0.07137777777777778, | |
| "grad_norm": 136.23388671875, | |
| "learning_rate": 3e-06, | |
| "loss": -20.6627, | |
| "step": 803 | |
| }, | |
| { | |
| "epoch": 0.07146666666666666, | |
| "grad_norm": 124.1773681640625, | |
| "learning_rate": 3e-06, | |
| "loss": -17.0962, | |
| "step": 804 | |
| }, | |
| { | |
| "completion_length": 232.89583587646484, | |
| "epoch": 0.07155555555555555, | |
| "grad_norm": 91.88973236083984, | |
| "learning_rate": 3e-06, | |
| "loss": -7.8492, | |
| "reward": 1.7291666865348816, | |
| "reward_std": 0.47015421837568283, | |
| "rewards/boxed_and_answer_tags_format_reward": 0.6875, | |
| "rewards/correctness_reward_func_math": 1.0416666567325592, | |
| "step": 805, | |
| "zero_std_ratio": 0.5 | |
| }, | |
| { | |
| "epoch": 0.07164444444444444, | |
| "grad_norm": 107.12287139892578, | |
| "learning_rate": 3e-06, | |
| "loss": -23.4332, | |
| "step": 806 | |
| }, | |
| { | |
| "epoch": 0.07173333333333333, | |
| "grad_norm": 104.5981674194336, | |
| "learning_rate": 3e-06, | |
| "loss": -11.8278, | |
| "step": 807 | |
| }, | |
| { | |
| "epoch": 0.07182222222222222, | |
| "grad_norm": 102.6692886352539, | |
| "learning_rate": 3e-06, | |
| "loss": -20.165, | |
| "step": 808 | |
| }, | |
| { | |
| "epoch": 0.0719111111111111, | |
| "grad_norm": 88.46080017089844, | |
| "learning_rate": 3e-06, | |
| "loss": -10.8484, | |
| "step": 809 | |
| }, | |
| { | |
| "epoch": 0.072, | |
| "grad_norm": 124.10685729980469, | |
| "learning_rate": 3e-06, | |
| "loss": -20.237, | |
| "step": 810 | |
| }, | |
| { | |
| "epoch": 0.07208888888888888, | |
| "grad_norm": 95.68196868896484, | |
| "learning_rate": 3e-06, | |
| "loss": -9.2521, | |
| "step": 811 | |
| }, | |
| { | |
| "epoch": 0.07217777777777777, | |
| "grad_norm": 95.62832641601562, | |
| "learning_rate": 3e-06, | |
| "loss": -25.6333, | |
| "step": 812 | |
| }, | |
| { | |
| "epoch": 0.07226666666666667, | |
| "grad_norm": 104.2110366821289, | |
| "learning_rate": 3e-06, | |
| "loss": -14.2533, | |
| "step": 813 | |
| }, | |
| { | |
| "epoch": 0.07235555555555556, | |
| "grad_norm": 114.39372253417969, | |
| "learning_rate": 3e-06, | |
| "loss": -22.2888, | |
| "step": 814 | |
| }, | |
| { | |
| "epoch": 0.07244444444444445, | |
| "grad_norm": 112.07288360595703, | |
| "learning_rate": 3e-06, | |
| "loss": -12.7028, | |
| "step": 815 | |
| }, | |
| { | |
| "epoch": 0.07253333333333334, | |
| "grad_norm": 138.86337280273438, | |
| "learning_rate": 3e-06, | |
| "loss": -23.0688, | |
| "step": 816 | |
| }, | |
| { | |
| "completion_length": 249.3541717529297, | |
| "epoch": 0.07262222222222223, | |
| "grad_norm": 106.32572937011719, | |
| "learning_rate": 3e-06, | |
| "loss": 1.5611, | |
| "reward": 1.291666716337204, | |
| "reward_std": 0.37455084919929504, | |
| "rewards/boxed_and_answer_tags_format_reward": 0.6666666865348816, | |
| "rewards/correctness_reward_func_math": 0.625, | |
| "step": 817, | |
| "zero_std_ratio": 0.5 | |
| }, | |
| { | |
| "epoch": 0.07271111111111112, | |
| "grad_norm": 119.1928482055664, | |
| "learning_rate": 3e-06, | |
| "loss": 6.5715, | |
| "step": 818 | |
| }, | |
| { | |
| "epoch": 0.0728, | |
| "grad_norm": 89.53767395019531, | |
| "learning_rate": 3e-06, | |
| "loss": 9.9694, | |
| "step": 819 | |
| }, | |
| { | |
| "epoch": 0.07288888888888889, | |
| "grad_norm": 97.2778091430664, | |
| "learning_rate": 3e-06, | |
| "loss": 8.1343, | |
| "step": 820 | |
| }, | |
| { | |
| "epoch": 0.07297777777777778, | |
| "grad_norm": 99.85116577148438, | |
| "learning_rate": 3e-06, | |
| "loss": 3.7739, | |
| "step": 821 | |
| }, | |
| { | |
| "epoch": 0.07306666666666667, | |
| "grad_norm": 107.6137924194336, | |
| "learning_rate": 3e-06, | |
| "loss": -0.6852, | |
| "step": 822 | |
| }, | |
| { | |
| "epoch": 0.07315555555555556, | |
| "grad_norm": 113.21214294433594, | |
| "learning_rate": 3e-06, | |
| "loss": 1.024, | |
| "step": 823 | |
| }, | |
| { | |
| "epoch": 0.07324444444444445, | |
| "grad_norm": 130.27040100097656, | |
| "learning_rate": 3e-06, | |
| "loss": 4.4904, | |
| "step": 824 | |
| }, | |
| { | |
| "epoch": 0.07333333333333333, | |
| "grad_norm": 87.94723510742188, | |
| "learning_rate": 3e-06, | |
| "loss": 7.7117, | |
| "step": 825 | |
| }, | |
| { | |
| "epoch": 0.07342222222222222, | |
| "grad_norm": 102.87310791015625, | |
| "learning_rate": 3e-06, | |
| "loss": 5.3932, | |
| "step": 826 | |
| }, | |
| { | |
| "epoch": 0.07351111111111111, | |
| "grad_norm": 97.53043365478516, | |
| "learning_rate": 3e-06, | |
| "loss": 1.1637, | |
| "step": 827 | |
| }, | |
| { | |
| "epoch": 0.0736, | |
| "grad_norm": 126.4931640625, | |
| "learning_rate": 3e-06, | |
| "loss": -3.7025, | |
| "step": 828 | |
| }, | |
| { | |
| "completion_length": 240.75, | |
| "epoch": 0.07368888888888889, | |
| "grad_norm": 88.22557067871094, | |
| "learning_rate": 3e-06, | |
| "loss": 22.5183, | |
| "reward": 1.1354166865348816, | |
| "reward_std": 0.35377833247184753, | |
| "rewards/boxed_and_answer_tags_format_reward": 0.59375, | |
| "rewards/correctness_reward_func_math": 0.5416666567325592, | |
| "step": 829, | |
| "zero_std_ratio": 0.625 | |
| }, | |
| { | |
| "epoch": 0.07377777777777778, | |
| "grad_norm": 86.47975158691406, | |
| "learning_rate": 3e-06, | |
| "loss": 24.5943, | |
| "step": 830 | |
| }, | |
| { | |
| "epoch": 0.07386666666666666, | |
| "grad_norm": 93.10693359375, | |
| "learning_rate": 3e-06, | |
| "loss": 27.2039, | |
| "step": 831 | |
| }, | |
| { | |
| "epoch": 0.07395555555555555, | |
| "grad_norm": 96.74742889404297, | |
| "learning_rate": 3e-06, | |
| "loss": 32.9558, | |
| "step": 832 | |
| }, | |
| { | |
| "epoch": 0.07404444444444444, | |
| "grad_norm": 97.9085693359375, | |
| "learning_rate": 3e-06, | |
| "loss": 25.7304, | |
| "step": 833 | |
| }, | |
| { | |
| "epoch": 0.07413333333333333, | |
| "grad_norm": 95.80497741699219, | |
| "learning_rate": 3e-06, | |
| "loss": 28.8338, | |
| "step": 834 | |
| }, | |
| { | |
| "epoch": 0.07422222222222222, | |
| "grad_norm": 82.9104995727539, | |
| "learning_rate": 3e-06, | |
| "loss": 20.9569, | |
| "step": 835 | |
| }, | |
| { | |
| "epoch": 0.0743111111111111, | |
| "grad_norm": 332.6025695800781, | |
| "learning_rate": 3e-06, | |
| "loss": 22.7693, | |
| "step": 836 | |
| }, | |
| { | |
| "epoch": 0.0744, | |
| "grad_norm": 97.00851440429688, | |
| "learning_rate": 3e-06, | |
| "loss": 24.6048, | |
| "step": 837 | |
| }, | |
| { | |
| "epoch": 0.07448888888888888, | |
| "grad_norm": 90.94817352294922, | |
| "learning_rate": 3e-06, | |
| "loss": 30.2657, | |
| "step": 838 | |
| }, | |
| { | |
| "epoch": 0.07457777777777778, | |
| "grad_norm": 91.87737274169922, | |
| "learning_rate": 3e-06, | |
| "loss": 23.8124, | |
| "step": 839 | |
| }, | |
| { | |
| "epoch": 0.07466666666666667, | |
| "grad_norm": 100.71826934814453, | |
| "learning_rate": 3e-06, | |
| "loss": 26.5346, | |
| "step": 840 | |
| }, | |
| { | |
| "completion_length": 244.3125, | |
| "epoch": 0.07475555555555556, | |
| "grad_norm": 454.38482666015625, | |
| "learning_rate": 3e-06, | |
| "loss": -16.9865, | |
| "reward": 1.2812500596046448, | |
| "reward_std": 0.3782803416252136, | |
| "rewards/boxed_and_answer_tags_format_reward": 0.6145833432674408, | |
| "rewards/correctness_reward_func_math": 0.6666666492819786, | |
| "step": 841, | |
| "zero_std_ratio": 0.625 | |
| }, | |
| { | |
| "epoch": 0.07484444444444445, | |
| "grad_norm": 102.08020782470703, | |
| "learning_rate": 3e-06, | |
| "loss": -18.5601, | |
| "step": 842 | |
| }, | |
| { | |
| "epoch": 0.07493333333333334, | |
| "grad_norm": 101.7997055053711, | |
| "learning_rate": 3e-06, | |
| "loss": -23.9473, | |
| "step": 843 | |
| }, | |
| { | |
| "epoch": 0.07502222222222223, | |
| "grad_norm": 100.2668685913086, | |
| "learning_rate": 3e-06, | |
| "loss": -26.2402, | |
| "step": 844 | |
| }, | |
| { | |
| "epoch": 0.07511111111111111, | |
| "grad_norm": 119.95198059082031, | |
| "learning_rate": 3e-06, | |
| "loss": -22.5011, | |
| "step": 845 | |
| }, | |
| { | |
| "epoch": 0.0752, | |
| "grad_norm": 101.83861541748047, | |
| "learning_rate": 3e-06, | |
| "loss": -14.7265, | |
| "step": 846 | |
| }, | |
| { | |
| "epoch": 0.07528888888888889, | |
| "grad_norm": 106.50312042236328, | |
| "learning_rate": 3e-06, | |
| "loss": -18.8046, | |
| "step": 847 | |
| }, | |
| { | |
| "epoch": 0.07537777777777778, | |
| "grad_norm": 114.58135986328125, | |
| "learning_rate": 3e-06, | |
| "loss": -20.037, | |
| "step": 848 | |
| }, | |
| { | |
| "epoch": 0.07546666666666667, | |
| "grad_norm": 121.03673553466797, | |
| "learning_rate": 3e-06, | |
| "loss": -25.817, | |
| "step": 849 | |
| }, | |
| { | |
| "epoch": 0.07555555555555556, | |
| "grad_norm": 97.15817260742188, | |
| "learning_rate": 3e-06, | |
| "loss": -28.7689, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 0.07564444444444444, | |
| "grad_norm": 101.58541107177734, | |
| "learning_rate": 3e-06, | |
| "loss": -24.6109, | |
| "step": 851 | |
| }, | |
| { | |
| "epoch": 0.07573333333333333, | |
| "grad_norm": 100.84017181396484, | |
| "learning_rate": 3e-06, | |
| "loss": -16.8195, | |
| "step": 852 | |
| }, | |
| { | |
| "completion_length": 247.4166717529297, | |
| "epoch": 0.07582222222222222, | |
| "grad_norm": 60.974788665771484, | |
| "learning_rate": 3e-06, | |
| "loss": 1.297, | |
| "reward": 1.2708333432674408, | |
| "reward_std": 0.12909945845603943, | |
| "rewards/boxed_and_answer_tags_format_reward": 0.6875, | |
| "rewards/correctness_reward_func_math": 0.5833333358168602, | |
| "step": 853, | |
| "zero_std_ratio": 0.875 | |
| }, | |
| { | |
| "epoch": 0.07591111111111111, | |
| "grad_norm": 58.498374938964844, | |
| "learning_rate": 3e-06, | |
| "loss": -4.5823, | |
| "step": 854 | |
| }, | |
| { | |
| "epoch": 0.076, | |
| "grad_norm": 51.417320251464844, | |
| "learning_rate": 3e-06, | |
| "loss": -2.8582, | |
| "step": 855 | |
| }, | |
| { | |
| "epoch": 0.07608888888888889, | |
| "grad_norm": 52.74491882324219, | |
| "learning_rate": 3e-06, | |
| "loss": -3.5933, | |
| "step": 856 | |
| }, | |
| { | |
| "epoch": 0.07617777777777777, | |
| "grad_norm": 54.02571487426758, | |
| "learning_rate": 3e-06, | |
| "loss": -0.1777, | |
| "step": 857 | |
| }, | |
| { | |
| "epoch": 0.07626666666666666, | |
| "grad_norm": 44.29707717895508, | |
| "learning_rate": 3e-06, | |
| "loss": -0.0581, | |
| "step": 858 | |
| }, | |
| { | |
| "epoch": 0.07635555555555555, | |
| "grad_norm": 63.4464111328125, | |
| "learning_rate": 3e-06, | |
| "loss": 0.981, | |
| "step": 859 | |
| }, | |
| { | |
| "epoch": 0.07644444444444444, | |
| "grad_norm": 58.98569869995117, | |
| "learning_rate": 3e-06, | |
| "loss": -5.3316, | |
| "step": 860 | |
| }, | |
| { | |
| "epoch": 0.07653333333333333, | |
| "grad_norm": 54.73743438720703, | |
| "learning_rate": 3e-06, | |
| "loss": -3.799, | |
| "step": 861 | |
| }, | |
| { | |
| "epoch": 0.07662222222222222, | |
| "grad_norm": 55.15388107299805, | |
| "learning_rate": 3e-06, | |
| "loss": -4.4757, | |
| "step": 862 | |
| }, | |
| { | |
| "epoch": 0.0767111111111111, | |
| "grad_norm": 61.510887145996094, | |
| "learning_rate": 3e-06, | |
| "loss": -0.6125, | |
| "step": 863 | |
| }, | |
| { | |
| "epoch": 0.0768, | |
| "grad_norm": 46.19833755493164, | |
| "learning_rate": 3e-06, | |
| "loss": -0.7855, | |
| "step": 864 | |
| }, | |
| { | |
| "completion_length": 251.3125, | |
| "epoch": 0.0768888888888889, | |
| "grad_norm": 111.38273620605469, | |
| "learning_rate": 3e-06, | |
| "loss": 14.4732, | |
| "reward": 1.5208333730697632, | |
| "reward_std": 0.4937378317117691, | |
| "rewards/boxed_and_answer_tags_format_reward": 0.7291666567325592, | |
| "rewards/correctness_reward_func_math": 0.7916666567325592, | |
| "step": 865, | |
| "zero_std_ratio": 0.375 | |
| }, | |
| { | |
| "epoch": 0.07697777777777778, | |
| "grad_norm": 112.39920043945312, | |
| "learning_rate": 3e-06, | |
| "loss": 3.7967, | |
| "step": 866 | |
| }, | |
| { | |
| "epoch": 0.07706666666666667, | |
| "grad_norm": 106.71125793457031, | |
| "learning_rate": 3e-06, | |
| "loss": 1.1063, | |
| "step": 867 | |
| }, | |
| { | |
| "epoch": 0.07715555555555556, | |
| "grad_norm": 129.4515838623047, | |
| "learning_rate": 3e-06, | |
| "loss": -2.7262, | |
| "step": 868 | |
| }, | |
| { | |
| "epoch": 0.07724444444444445, | |
| "grad_norm": 109.67815399169922, | |
| "learning_rate": 3e-06, | |
| "loss": 0.1256, | |
| "step": 869 | |
| }, | |
| { | |
| "epoch": 0.07733333333333334, | |
| "grad_norm": 116.57035827636719, | |
| "learning_rate": 3e-06, | |
| "loss": -2.946, | |
| "step": 870 | |
| }, | |
| { | |
| "epoch": 0.07742222222222223, | |
| "grad_norm": 122.09374237060547, | |
| "learning_rate": 3e-06, | |
| "loss": 13.0142, | |
| "step": 871 | |
| }, | |
| { | |
| "epoch": 0.07751111111111111, | |
| "grad_norm": 108.0525894165039, | |
| "learning_rate": 3e-06, | |
| "loss": 2.4968, | |
| "step": 872 | |
| }, | |
| { | |
| "epoch": 0.0776, | |
| "grad_norm": 108.2818374633789, | |
| "learning_rate": 3e-06, | |
| "loss": -0.8286, | |
| "step": 873 | |
| }, | |
| { | |
| "epoch": 0.07768888888888889, | |
| "grad_norm": 139.2396697998047, | |
| "learning_rate": 3e-06, | |
| "loss": -5.0471, | |
| "step": 874 | |
| }, | |
| { | |
| "epoch": 0.07777777777777778, | |
| "grad_norm": 114.9443588256836, | |
| "learning_rate": 3e-06, | |
| "loss": -1.3382, | |
| "step": 875 | |
| }, | |
| { | |
| "epoch": 0.07786666666666667, | |
| "grad_norm": 128.51272583007812, | |
| "learning_rate": 3e-06, | |
| "loss": -4.8307, | |
| "step": 876 | |
| }, | |
| { | |
| "completion_length": 252.50000762939453, | |
| "epoch": 0.07795555555555556, | |
| "grad_norm": 126.4562759399414, | |
| "learning_rate": 3e-06, | |
| "loss": 0.751, | |
| "reward": 1.0729166865348816, | |
| "reward_std": 0.22548970580101013, | |
| "rewards/boxed_and_answer_tags_format_reward": 0.6145833432674408, | |
| "rewards/correctness_reward_func_math": 0.4583333283662796, | |
| "step": 877, | |
| "zero_std_ratio": 0.75 | |
| }, | |
| { | |
| "epoch": 0.07804444444444444, | |
| "grad_norm": 82.09607696533203, | |
| "learning_rate": 3e-06, | |
| "loss": 0.195, | |
| "step": 878 | |
| }, | |
| { | |
| "epoch": 0.07813333333333333, | |
| "grad_norm": 74.75113677978516, | |
| "learning_rate": 3e-06, | |
| "loss": -1.5817, | |
| "step": 879 | |
| }, | |
| { | |
| "epoch": 0.07822222222222222, | |
| "grad_norm": 115.73063659667969, | |
| "learning_rate": 3e-06, | |
| "loss": -8.3706, | |
| "step": 880 | |
| }, | |
| { | |
| "epoch": 0.07831111111111111, | |
| "grad_norm": 70.39916229248047, | |
| "learning_rate": 3e-06, | |
| "loss": -3.2191, | |
| "step": 881 | |
| }, | |
| { | |
| "epoch": 0.0784, | |
| "grad_norm": 103.28494262695312, | |
| "learning_rate": 3e-06, | |
| "loss": 1.0954, | |
| "step": 882 | |
| }, | |
| { | |
| "epoch": 0.07848888888888889, | |
| "grad_norm": 100.23104858398438, | |
| "learning_rate": 3e-06, | |
| "loss": 0.0428, | |
| "step": 883 | |
| }, | |
| { | |
| "epoch": 0.07857777777777777, | |
| "grad_norm": 90.33434295654297, | |
| "learning_rate": 3e-06, | |
| "loss": -0.8422, | |
| "step": 884 | |
| }, | |
| { | |
| "epoch": 0.07866666666666666, | |
| "grad_norm": 99.41636657714844, | |
| "learning_rate": 3e-06, | |
| "loss": -2.558, | |
| "step": 885 | |
| }, | |
| { | |
| "epoch": 0.07875555555555555, | |
| "grad_norm": 99.72211456298828, | |
| "learning_rate": 3e-06, | |
| "loss": -8.9057, | |
| "step": 886 | |
| }, | |
| { | |
| "epoch": 0.07884444444444444, | |
| "grad_norm": 73.4344711303711, | |
| "learning_rate": 3e-06, | |
| "loss": -4.09, | |
| "step": 887 | |
| }, | |
| { | |
| "epoch": 0.07893333333333333, | |
| "grad_norm": 98.25971221923828, | |
| "learning_rate": 3e-06, | |
| "loss": -0.4152, | |
| "step": 888 | |
| }, | |
| { | |
| "completion_length": 249.2916717529297, | |
| "epoch": 0.07902222222222222, | |
| "grad_norm": 165.06918334960938, | |
| "learning_rate": 3e-06, | |
| "loss": 25.8684, | |
| "reward": 1.4270833730697632, | |
| "reward_std": 0.5354157984256744, | |
| "rewards/boxed_and_answer_tags_format_reward": 0.6770833432674408, | |
| "rewards/correctness_reward_func_math": 0.75, | |
| "step": 889, | |
| "zero_std_ratio": 0.375 | |
| }, | |
| { | |
| "epoch": 0.0791111111111111, | |
| "grad_norm": 132.98858642578125, | |
| "learning_rate": 3e-06, | |
| "loss": 19.4902, | |
| "step": 890 | |
| }, | |
| { | |
| "epoch": 0.0792, | |
| "grad_norm": 122.3958969116211, | |
| "learning_rate": 3e-06, | |
| "loss": 32.6103, | |
| "step": 891 | |
| }, | |
| { | |
| "epoch": 0.0792888888888889, | |
| "grad_norm": 118.38285064697266, | |
| "learning_rate": 3e-06, | |
| "loss": 32.6835, | |
| "step": 892 | |
| }, | |
| { | |
| "epoch": 0.07937777777777778, | |
| "grad_norm": 126.4738540649414, | |
| "learning_rate": 3e-06, | |
| "loss": 30.5181, | |
| "step": 893 | |
| }, | |
| { | |
| "epoch": 0.07946666666666667, | |
| "grad_norm": 128.20831298828125, | |
| "learning_rate": 3e-06, | |
| "loss": 33.4332, | |
| "step": 894 | |
| }, | |
| { | |
| "epoch": 0.07955555555555556, | |
| "grad_norm": 152.9354248046875, | |
| "learning_rate": 3e-06, | |
| "loss": 23.7035, | |
| "step": 895 | |
| }, | |
| { | |
| "epoch": 0.07964444444444445, | |
| "grad_norm": 130.27053833007812, | |
| "learning_rate": 3e-06, | |
| "loss": 16.7548, | |
| "step": 896 | |
| }, | |
| { | |
| "epoch": 0.07973333333333334, | |
| "grad_norm": 127.17219543457031, | |
| "learning_rate": 3e-06, | |
| "loss": 30.7848, | |
| "step": 897 | |
| }, | |
| { | |
| "epoch": 0.07982222222222222, | |
| "grad_norm": 118.670654296875, | |
| "learning_rate": 3e-06, | |
| "loss": 30.7772, | |
| "step": 898 | |
| }, | |
| { | |
| "epoch": 0.07991111111111111, | |
| "grad_norm": 120.19160461425781, | |
| "learning_rate": 3e-06, | |
| "loss": 27.401, | |
| "step": 899 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "grad_norm": 137.2371063232422, | |
| "learning_rate": 3e-06, | |
| "loss": 30.9334, | |
| "step": 900 | |
| }, | |
| { | |
| "completion_length": 255.89583587646484, | |
| "epoch": 0.08008888888888889, | |
| "grad_norm": 88.54483032226562, | |
| "learning_rate": 3e-06, | |
| "loss": 3.6467, | |
| "reward": 1.3437500596046448, | |
| "reward_std": 0.30770808458328247, | |
| "rewards/boxed_and_answer_tags_format_reward": 0.6770833432674408, | |
| "rewards/correctness_reward_func_math": 0.6666666567325592, | |
| "step": 901, | |
| "zero_std_ratio": 0.625 | |
| }, | |
| { | |
| "epoch": 0.08017777777777778, | |
| "grad_norm": 87.48735046386719, | |
| "learning_rate": 3e-06, | |
| "loss": 1.8615, | |
| "step": 902 | |
| }, | |
| { | |
| "epoch": 0.08026666666666667, | |
| "grad_norm": 86.97764587402344, | |
| "learning_rate": 3e-06, | |
| "loss": 2.8386, | |
| "step": 903 | |
| }, | |
| { | |
| "epoch": 0.08035555555555556, | |
| "grad_norm": 105.64205932617188, | |
| "learning_rate": 3e-06, | |
| "loss": -3.9129, | |
| "step": 904 | |
| }, | |
| { | |
| "epoch": 0.08044444444444444, | |
| "grad_norm": 80.34683227539062, | |
| "learning_rate": 3e-06, | |
| "loss": 1.4293, | |
| "step": 905 | |
| }, | |
| { | |
| "epoch": 0.08053333333333333, | |
| "grad_norm": 91.16341400146484, | |
| "learning_rate": 3e-06, | |
| "loss": 2.803, | |
| "step": 906 | |
| }, | |
| { | |
| "epoch": 0.08062222222222222, | |
| "grad_norm": 96.49407196044922, | |
| "learning_rate": 3e-06, | |
| "loss": 2.4431, | |
| "step": 907 | |
| }, | |
| { | |
| "epoch": 0.08071111111111111, | |
| "grad_norm": 84.40055084228516, | |
| "learning_rate": 3e-06, | |
| "loss": 0.5478, | |
| "step": 908 | |
| }, | |
| { | |
| "epoch": 0.0808, | |
| "grad_norm": 78.79622650146484, | |
| "learning_rate": 3e-06, | |
| "loss": 1.4422, | |
| "step": 909 | |
| }, | |
| { | |
| "epoch": 0.08088888888888889, | |
| "grad_norm": 128.47531127929688, | |
| "learning_rate": 3e-06, | |
| "loss": -5.1302, | |
| "step": 910 | |
| }, | |
| { | |
| "epoch": 0.08097777777777777, | |
| "grad_norm": 79.19956970214844, | |
| "learning_rate": 3e-06, | |
| "loss": -0.2354, | |
| "step": 911 | |
| }, | |
| { | |
| "epoch": 0.08106666666666666, | |
| "grad_norm": 107.92975616455078, | |
| "learning_rate": 3e-06, | |
| "loss": 1.3494, | |
| "step": 912 | |
| }, | |
| { | |
| "completion_length": 236.7916717529297, | |
| "epoch": 0.08115555555555555, | |
| "grad_norm": 72.56483459472656, | |
| "learning_rate": 3e-06, | |
| "loss": -4.7678, | |
| "reward": 1.0, | |
| "reward_std": 0.23116151988506317, | |
| "rewards/boxed_and_answer_tags_format_reward": 0.625, | |
| "rewards/correctness_reward_func_math": 0.375, | |
| "step": 913, | |
| "zero_std_ratio": 0.75 | |
| }, | |
| { | |
| "epoch": 0.08124444444444444, | |
| "grad_norm": 84.60346984863281, | |
| "learning_rate": 3e-06, | |
| "loss": -1.6215, | |
| "step": 914 | |
| }, | |
| { | |
| "epoch": 0.08133333333333333, | |
| "grad_norm": 80.78106689453125, | |
| "learning_rate": 3e-06, | |
| "loss": 3.2587, | |
| "step": 915 | |
| }, | |
| { | |
| "epoch": 0.08142222222222223, | |
| "grad_norm": 71.9332275390625, | |
| "learning_rate": 3e-06, | |
| "loss": -4.7685, | |
| "step": 916 | |
| }, | |
| { | |
| "epoch": 0.08151111111111112, | |
| "grad_norm": 98.66748046875, | |
| "learning_rate": 3e-06, | |
| "loss": -6.589, | |
| "step": 917 | |
| }, | |
| { | |
| "epoch": 0.0816, | |
| "grad_norm": 135.69175720214844, | |
| "learning_rate": 3e-06, | |
| "loss": -7.5017, | |
| "step": 918 | |
| }, | |
| { | |
| "epoch": 0.0816888888888889, | |
| "grad_norm": 73.60437774658203, | |
| "learning_rate": 3e-06, | |
| "loss": -5.1495, | |
| "step": 919 | |
| }, | |
| { | |
| "epoch": 0.08177777777777778, | |
| "grad_norm": 90.75928497314453, | |
| "learning_rate": 3e-06, | |
| "loss": -2.384, | |
| "step": 920 | |
| }, | |
| { | |
| "epoch": 0.08186666666666667, | |
| "grad_norm": 82.05548095703125, | |
| "learning_rate": 3e-06, | |
| "loss": 2.8112, | |
| "step": 921 | |
| }, | |
| { | |
| "epoch": 0.08195555555555556, | |
| "grad_norm": 77.72819519042969, | |
| "learning_rate": 3e-06, | |
| "loss": -5.4085, | |
| "step": 922 | |
| }, | |
| { | |
| "epoch": 0.08204444444444445, | |
| "grad_norm": 100.81270599365234, | |
| "learning_rate": 3e-06, | |
| "loss": -7.311, | |
| "step": 923 | |
| }, | |
| { | |
| "epoch": 0.08213333333333334, | |
| "grad_norm": 86.93998718261719, | |
| "learning_rate": 3e-06, | |
| "loss": -7.627, | |
| "step": 924 | |
| }, | |
| { | |
| "completion_length": 244.2916717529297, | |
| "epoch": 0.08222222222222222, | |
| "grad_norm": 124.40040588378906, | |
| "learning_rate": 3e-06, | |
| "loss": 2.5607, | |
| "reward": 1.7708333730697632, | |
| "reward_std": 0.5440726578235626, | |
| "rewards/boxed_and_answer_tags_format_reward": 0.7291666567325592, | |
| "rewards/correctness_reward_func_math": 1.0416666567325592, | |
| "step": 925, | |
| "zero_std_ratio": 0.375 | |
| }, | |
| { | |
| "epoch": 0.08231111111111111, | |
| "grad_norm": 100.88472747802734, | |
| "learning_rate": 3e-06, | |
| "loss": -11.1251, | |
| "step": 926 | |
| }, | |
| { | |
| "epoch": 0.0824, | |
| "grad_norm": 139.0868377685547, | |
| "learning_rate": 3e-06, | |
| "loss": -4.6789, | |
| "step": 927 | |
| }, | |
| { | |
| "epoch": 0.08248888888888889, | |
| "grad_norm": 105.37358093261719, | |
| "learning_rate": 3e-06, | |
| "loss": -1.7812, | |
| "step": 928 | |
| }, | |
| { | |
| "epoch": 0.08257777777777778, | |
| "grad_norm": 122.72453308105469, | |
| "learning_rate": 3e-06, | |
| "loss": 5.9917, | |
| "step": 929 | |
| }, | |
| { | |
| "epoch": 0.08266666666666667, | |
| "grad_norm": 125.92015075683594, | |
| "learning_rate": 3e-06, | |
| "loss": -2.2776, | |
| "step": 930 | |
| }, | |
| { | |
| "epoch": 0.08275555555555555, | |
| "grad_norm": 122.41661834716797, | |
| "learning_rate": 3e-06, | |
| "loss": 0.8417, | |
| "step": 931 | |
| }, | |
| { | |
| "epoch": 0.08284444444444444, | |
| "grad_norm": 117.53387451171875, | |
| "learning_rate": 3e-06, | |
| "loss": -13.1484, | |
| "step": 932 | |
| }, | |
| { | |
| "epoch": 0.08293333333333333, | |
| "grad_norm": 126.8305892944336, | |
| "learning_rate": 3e-06, | |
| "loss": -7.6824, | |
| "step": 933 | |
| }, | |
| { | |
| "epoch": 0.08302222222222222, | |
| "grad_norm": 111.0191421508789, | |
| "learning_rate": 3e-06, | |
| "loss": -3.9524, | |
| "step": 934 | |
| }, | |
| { | |
| "epoch": 0.08311111111111111, | |
| "grad_norm": 131.84397888183594, | |
| "learning_rate": 3e-06, | |
| "loss": 2.9848, | |
| "step": 935 | |
| }, | |
| { | |
| "epoch": 0.0832, | |
| "grad_norm": 124.30160522460938, | |
| "learning_rate": 3e-06, | |
| "loss": -5.5558, | |
| "step": 936 | |
| }, | |
| { | |
| "completion_length": 240.9791717529297, | |
| "epoch": 0.08328888888888888, | |
| "grad_norm": 137.87579345703125, | |
| "learning_rate": 3e-06, | |
| "loss": -18.5164, | |
| "reward": 1.1354166865348816, | |
| "reward_std": 0.4608011841773987, | |
| "rewards/boxed_and_answer_tags_format_reward": 0.6770833432674408, | |
| "rewards/correctness_reward_func_math": 0.4583333432674408, | |
| "step": 937, | |
| "zero_std_ratio": 0.375 | |
| }, | |
| { | |
| "epoch": 0.08337777777777777, | |
| "grad_norm": 132.598388671875, | |
| "learning_rate": 3e-06, | |
| "loss": -26.5032, | |
| "step": 938 | |
| }, | |
| { | |
| "epoch": 0.08346666666666666, | |
| "grad_norm": 147.24671936035156, | |
| "learning_rate": 3e-06, | |
| "loss": -20.2196, | |
| "step": 939 | |
| }, | |
| { | |
| "epoch": 0.08355555555555555, | |
| "grad_norm": 140.57591247558594, | |
| "learning_rate": 3e-06, | |
| "loss": -19.0462, | |
| "step": 940 | |
| }, | |
| { | |
| "epoch": 0.08364444444444444, | |
| "grad_norm": 124.26339721679688, | |
| "learning_rate": 3e-06, | |
| "loss": -22.6895, | |
| "step": 941 | |
| }, | |
| { | |
| "epoch": 0.08373333333333334, | |
| "grad_norm": 126.45221710205078, | |
| "learning_rate": 3e-06, | |
| "loss": -24.6262, | |
| "step": 942 | |
| }, | |
| { | |
| "epoch": 0.08382222222222223, | |
| "grad_norm": 135.6764373779297, | |
| "learning_rate": 3e-06, | |
| "loss": -20.8866, | |
| "step": 943 | |
| }, | |
| { | |
| "epoch": 0.08391111111111112, | |
| "grad_norm": 139.4601287841797, | |
| "learning_rate": 3e-06, | |
| "loss": -28.928, | |
| "step": 944 | |
| }, | |
| { | |
| "epoch": 0.084, | |
| "grad_norm": 173.5882568359375, | |
| "learning_rate": 3e-06, | |
| "loss": -23.599, | |
| "step": 945 | |
| }, | |
| { | |
| "epoch": 0.0840888888888889, | |
| "grad_norm": 131.79933166503906, | |
| "learning_rate": 3e-06, | |
| "loss": -22.4616, | |
| "step": 946 | |
| }, | |
| { | |
| "epoch": 0.08417777777777778, | |
| "grad_norm": 128.2574920654297, | |
| "learning_rate": 3e-06, | |
| "loss": -26.6084, | |
| "step": 947 | |
| }, | |
| { | |
| "epoch": 0.08426666666666667, | |
| "grad_norm": 133.56024169921875, | |
| "learning_rate": 3e-06, | |
| "loss": -29.0544, | |
| "step": 948 | |
| }, | |
| { | |
| "completion_length": 254.3541717529297, | |
| "epoch": 0.08435555555555556, | |
| "grad_norm": 113.98381805419922, | |
| "learning_rate": 3e-06, | |
| "loss": 2.0708, | |
| "reward": 1.291666716337204, | |
| "reward_std": 0.4701542258262634, | |
| "rewards/boxed_and_answer_tags_format_reward": 0.75, | |
| "rewards/correctness_reward_func_math": 0.541666679084301, | |
| "step": 949, | |
| "zero_std_ratio": 0.5 | |
| }, | |
| { | |
| "epoch": 0.08444444444444445, | |
| "grad_norm": 158.377197265625, | |
| "learning_rate": 3e-06, | |
| "loss": 0.0861, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 0.08453333333333334, | |
| "grad_norm": 152.6723175048828, | |
| "learning_rate": 3e-06, | |
| "loss": 8.1982, | |
| "step": 951 | |
| }, | |
| { | |
| "epoch": 0.08462222222222222, | |
| "grad_norm": 122.41393280029297, | |
| "learning_rate": 3e-06, | |
| "loss": -2.2863, | |
| "step": 952 | |
| }, | |
| { | |
| "epoch": 0.08471111111111111, | |
| "grad_norm": 137.0810089111328, | |
| "learning_rate": 3e-06, | |
| "loss": -1.879, | |
| "step": 953 | |
| }, | |
| { | |
| "epoch": 0.0848, | |
| "grad_norm": 122.0219497680664, | |
| "learning_rate": 3e-06, | |
| "loss": -4.6058, | |
| "step": 954 | |
| }, | |
| { | |
| "epoch": 0.08488888888888889, | |
| "grad_norm": 122.55841064453125, | |
| "learning_rate": 3e-06, | |
| "loss": 0.6988, | |
| "step": 955 | |
| }, | |
| { | |
| "epoch": 0.08497777777777778, | |
| "grad_norm": 144.296875, | |
| "learning_rate": 3e-06, | |
| "loss": -2.2706, | |
| "step": 956 | |
| }, | |
| { | |
| "epoch": 0.08506666666666667, | |
| "grad_norm": 130.82684326171875, | |
| "learning_rate": 3e-06, | |
| "loss": 6.122, | |
| "step": 957 | |
| }, | |
| { | |
| "epoch": 0.08515555555555555, | |
| "grad_norm": 121.61994934082031, | |
| "learning_rate": 3e-06, | |
| "loss": -4.2647, | |
| "step": 958 | |
| }, | |
| { | |
| "epoch": 0.08524444444444444, | |
| "grad_norm": 124.73589324951172, | |
| "learning_rate": 3e-06, | |
| "loss": -4.8135, | |
| "step": 959 | |
| }, | |
| { | |
| "epoch": 0.08533333333333333, | |
| "grad_norm": 109.87874603271484, | |
| "learning_rate": 3e-06, | |
| "loss": -7.1554, | |
| "step": 960 | |
| }, | |
| { | |
| "completion_length": 244.50000762939453, | |
| "epoch": 0.08542222222222222, | |
| "grad_norm": 74.7403793334961, | |
| "learning_rate": 3e-06, | |
| "loss": 9.6961, | |
| "reward": 2.1666667461395264, | |
| "reward_std": 0.20412414520978928, | |
| "rewards/boxed_and_answer_tags_format_reward": 0.75, | |
| "rewards/correctness_reward_func_math": 1.4166666269302368, | |
| "step": 961, | |
| "zero_std_ratio": 0.75 | |
| }, | |
| { | |
| "epoch": 0.08551111111111111, | |
| "grad_norm": 68.04969024658203, | |
| "learning_rate": 3e-06, | |
| "loss": 12.6362, | |
| "step": 962 | |
| }, | |
| { | |
| "epoch": 0.0856, | |
| "grad_norm": 69.76187133789062, | |
| "learning_rate": 3e-06, | |
| "loss": 13.3129, | |
| "step": 963 | |
| }, | |
| { | |
| "epoch": 0.08568888888888888, | |
| "grad_norm": 81.5944595336914, | |
| "learning_rate": 3e-06, | |
| "loss": 9.7628, | |
| "step": 964 | |
| }, | |
| { | |
| "epoch": 0.08577777777777777, | |
| "grad_norm": 83.20171356201172, | |
| "learning_rate": 3e-06, | |
| "loss": 8.2983, | |
| "step": 965 | |
| }, | |
| { | |
| "epoch": 0.08586666666666666, | |
| "grad_norm": 78.74623107910156, | |
| "learning_rate": 3e-06, | |
| "loss": 3.5912, | |
| "step": 966 | |
| }, | |
| { | |
| "epoch": 0.08595555555555555, | |
| "grad_norm": 65.22360229492188, | |
| "learning_rate": 3e-06, | |
| "loss": 8.6179, | |
| "step": 967 | |
| }, | |
| { | |
| "epoch": 0.08604444444444445, | |
| "grad_norm": 67.84490966796875, | |
| "learning_rate": 3e-06, | |
| "loss": 10.9563, | |
| "step": 968 | |
| }, | |
| { | |
| "epoch": 0.08613333333333334, | |
| "grad_norm": 66.93883514404297, | |
| "learning_rate": 3e-06, | |
| "loss": 11.5826, | |
| "step": 969 | |
| }, | |
| { | |
| "epoch": 0.08622222222222223, | |
| "grad_norm": 75.27574157714844, | |
| "learning_rate": 3e-06, | |
| "loss": 8.5011, | |
| "step": 970 | |
| }, | |
| { | |
| "epoch": 0.08631111111111112, | |
| "grad_norm": 68.24022674560547, | |
| "learning_rate": 3e-06, | |
| "loss": 6.8511, | |
| "step": 971 | |
| }, | |
| { | |
| "epoch": 0.0864, | |
| "grad_norm": 70.8918685913086, | |
| "learning_rate": 3e-06, | |
| "loss": 2.4563, | |
| "step": 972 | |
| }, | |
| { | |
| "completion_length": 251.0416717529297, | |
| "epoch": 0.08648888888888889, | |
| "grad_norm": 130.6646270751953, | |
| "learning_rate": 3e-06, | |
| "loss": -5.8085, | |
| "reward": 1.4479166865348816, | |
| "reward_std": 0.5305383503437042, | |
| "rewards/boxed_and_answer_tags_format_reward": 0.7395833432674408, | |
| "rewards/correctness_reward_func_math": 0.708333358168602, | |
| "step": 973, | |
| "zero_std_ratio": 0.375 | |
| }, | |
| { | |
| "epoch": 0.08657777777777778, | |
| "grad_norm": 134.2749786376953, | |
| "learning_rate": 3e-06, | |
| "loss": 1.4252, | |
| "step": 974 | |
| }, | |
| { | |
| "epoch": 0.08666666666666667, | |
| "grad_norm": 137.93409729003906, | |
| "learning_rate": 3e-06, | |
| "loss": -1.3707, | |
| "step": 975 | |
| }, | |
| { | |
| "epoch": 0.08675555555555556, | |
| "grad_norm": 121.03262329101562, | |
| "learning_rate": 3e-06, | |
| "loss": 1.4106, | |
| "step": 976 | |
| }, | |
| { | |
| "epoch": 0.08684444444444445, | |
| "grad_norm": 132.82774353027344, | |
| "learning_rate": 3e-06, | |
| "loss": -4.6112, | |
| "step": 977 | |
| }, | |
| { | |
| "epoch": 0.08693333333333333, | |
| "grad_norm": 163.79421997070312, | |
| "learning_rate": 3e-06, | |
| "loss": -9.213, | |
| "step": 978 | |
| }, | |
| { | |
| "epoch": 0.08702222222222222, | |
| "grad_norm": 135.431396484375, | |
| "learning_rate": 3e-06, | |
| "loss": -9.618, | |
| "step": 979 | |
| }, | |
| { | |
| "epoch": 0.08711111111111111, | |
| "grad_norm": 130.899658203125, | |
| "learning_rate": 3e-06, | |
| "loss": -2.0931, | |
| "step": 980 | |
| }, | |
| { | |
| "epoch": 0.0872, | |
| "grad_norm": 133.3518524169922, | |
| "learning_rate": 3e-06, | |
| "loss": -5.8637, | |
| "step": 981 | |
| }, | |
| { | |
| "epoch": 0.08728888888888889, | |
| "grad_norm": 144.94068908691406, | |
| "learning_rate": 3e-06, | |
| "loss": -2.6211, | |
| "step": 982 | |
| }, | |
| { | |
| "epoch": 0.08737777777777778, | |
| "grad_norm": 141.72738647460938, | |
| "learning_rate": 3e-06, | |
| "loss": -9.573, | |
| "step": 983 | |
| }, | |
| { | |
| "epoch": 0.08746666666666666, | |
| "grad_norm": 148.67123413085938, | |
| "learning_rate": 3e-06, | |
| "loss": -12.8677, | |
| "step": 984 | |
| }, | |
| { | |
| "completion_length": 255.89583587646484, | |
| "epoch": 0.08755555555555555, | |
| "grad_norm": 148.83518981933594, | |
| "learning_rate": 3e-06, | |
| "loss": -2.2086, | |
| "reward": 1.3958333432674408, | |
| "reward_std": 0.48216672986745834, | |
| "rewards/boxed_and_answer_tags_format_reward": 0.7291666865348816, | |
| "rewards/correctness_reward_func_math": 0.6666666865348816, | |
| "step": 985, | |
| "zero_std_ratio": 0.375 | |
| }, | |
| { | |
| "epoch": 0.08764444444444444, | |
| "grad_norm": 141.38470458984375, | |
| "learning_rate": 3e-06, | |
| "loss": -5.2298, | |
| "step": 986 | |
| }, | |
| { | |
| "epoch": 0.08773333333333333, | |
| "grad_norm": 114.79426574707031, | |
| "learning_rate": 3e-06, | |
| "loss": 0.6894, | |
| "step": 987 | |
| }, | |
| { | |
| "epoch": 0.08782222222222222, | |
| "grad_norm": 137.67416381835938, | |
| "learning_rate": 3e-06, | |
| "loss": 0.8801, | |
| "step": 988 | |
| }, | |
| { | |
| "epoch": 0.0879111111111111, | |
| "grad_norm": 138.06517028808594, | |
| "learning_rate": 3e-06, | |
| "loss": -1.6148, | |
| "step": 989 | |
| }, | |
| { | |
| "epoch": 0.088, | |
| "grad_norm": 153.39608764648438, | |
| "learning_rate": 3e-06, | |
| "loss": 0.5819, | |
| "step": 990 | |
| }, | |
| { | |
| "epoch": 0.08808888888888888, | |
| "grad_norm": 159.18431091308594, | |
| "learning_rate": 3e-06, | |
| "loss": -3.8668, | |
| "step": 991 | |
| }, | |
| { | |
| "epoch": 0.08817777777777777, | |
| "grad_norm": 141.29696655273438, | |
| "learning_rate": 3e-06, | |
| "loss": -7.4773, | |
| "step": 992 | |
| }, | |
| { | |
| "epoch": 0.08826666666666666, | |
| "grad_norm": 129.83058166503906, | |
| "learning_rate": 3e-06, | |
| "loss": -1.6063, | |
| "step": 993 | |
| }, | |
| { | |
| "epoch": 0.08835555555555556, | |
| "grad_norm": 133.49261474609375, | |
| "learning_rate": 3e-06, | |
| "loss": -0.5547, | |
| "step": 994 | |
| }, | |
| { | |
| "epoch": 0.08844444444444445, | |
| "grad_norm": 125.11674499511719, | |
| "learning_rate": 3e-06, | |
| "loss": -3.5547, | |
| "step": 995 | |
| }, | |
| { | |
| "epoch": 0.08853333333333334, | |
| "grad_norm": 113.9607925415039, | |
| "learning_rate": 3e-06, | |
| "loss": -1.1078, | |
| "step": 996 | |
| }, | |
| { | |
| "completion_length": 254.1041717529297, | |
| "epoch": 0.08862222222222223, | |
| "grad_norm": 112.0015869140625, | |
| "learning_rate": 3e-06, | |
| "loss": 0.6874, | |
| "reward": 1.6666666865348816, | |
| "reward_std": 0.3557328134775162, | |
| "rewards/boxed_and_answer_tags_format_reward": 0.7083333432674408, | |
| "rewards/correctness_reward_func_math": 0.9583333432674408, | |
| "step": 997, | |
| "zero_std_ratio": 0.375 | |
| }, | |
| { | |
| "epoch": 0.08871111111111112, | |
| "grad_norm": 117.1604232788086, | |
| "learning_rate": 3e-06, | |
| "loss": 10.6509, | |
| "step": 998 | |
| }, | |
| { | |
| "epoch": 0.0888, | |
| "grad_norm": 97.7497787475586, | |
| "learning_rate": 3e-06, | |
| "loss": 1.431, | |
| "step": 999 | |
| }, | |
| { | |
| "epoch": 0.08888888888888889, | |
| "grad_norm": 99.68553161621094, | |
| "learning_rate": 3e-06, | |
| "loss": 9.1808, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.08897777777777778, | |
| "grad_norm": 127.76787567138672, | |
| "learning_rate": 3e-06, | |
| "loss": 7.5762, | |
| "step": 1001 | |
| }, | |
| { | |
| "epoch": 0.08906666666666667, | |
| "grad_norm": 93.00830841064453, | |
| "learning_rate": 3e-06, | |
| "loss": 5.1825, | |
| "step": 1002 | |
| }, | |
| { | |
| "epoch": 0.08915555555555556, | |
| "grad_norm": 100.87223815917969, | |
| "learning_rate": 3e-06, | |
| "loss": -0.2862, | |
| "step": 1003 | |
| }, | |
| { | |
| "epoch": 0.08924444444444445, | |
| "grad_norm": 125.37133026123047, | |
| "learning_rate": 3e-06, | |
| "loss": 9.2038, | |
| "step": 1004 | |
| }, | |
| { | |
| "epoch": 0.08933333333333333, | |
| "grad_norm": 107.84559631347656, | |
| "learning_rate": 3e-06, | |
| "loss": -0.0948, | |
| "step": 1005 | |
| }, | |
| { | |
| "epoch": 0.08942222222222222, | |
| "grad_norm": 126.59029388427734, | |
| "learning_rate": 3e-06, | |
| "loss": 7.2332, | |
| "step": 1006 | |
| }, | |
| { | |
| "epoch": 0.08951111111111111, | |
| "grad_norm": 120.74652099609375, | |
| "learning_rate": 3e-06, | |
| "loss": 6.3094, | |
| "step": 1007 | |
| }, | |
| { | |
| "epoch": 0.0896, | |
| "grad_norm": 94.37996673583984, | |
| "learning_rate": 3e-06, | |
| "loss": 3.469, | |
| "step": 1008 | |
| }, | |
| { | |
| "completion_length": 242.25000762939453, | |
| "epoch": 0.08968888888888889, | |
| "grad_norm": 61.877044677734375, | |
| "learning_rate": 3e-06, | |
| "loss": 0.3003, | |
| "reward": 1.0104166865348816, | |
| "reward_std": 0.11467799544334412, | |
| "rewards/boxed_and_answer_tags_format_reward": 0.6770833432674408, | |
| "rewards/correctness_reward_func_math": 0.3333333358168602, | |
| "step": 1009, | |
| "zero_std_ratio": 0.875 | |
| }, | |
| { | |
| "epoch": 0.08977777777777778, | |
| "grad_norm": 48.69044494628906, | |
| "learning_rate": 3e-06, | |
| "loss": -1.93, | |
| "step": 1010 | |
| }, | |
| { | |
| "epoch": 0.08986666666666666, | |
| "grad_norm": 62.56666946411133, | |
| "learning_rate": 3e-06, | |
| "loss": 0.7216, | |
| "step": 1011 | |
| }, | |
| { | |
| "epoch": 0.08995555555555555, | |
| "grad_norm": 54.1674690246582, | |
| "learning_rate": 3e-06, | |
| "loss": -1.7886, | |
| "step": 1012 | |
| }, | |
| { | |
| "epoch": 0.09004444444444444, | |
| "grad_norm": 52.60224533081055, | |
| "learning_rate": 3e-06, | |
| "loss": 0.5879, | |
| "step": 1013 | |
| }, | |
| { | |
| "epoch": 0.09013333333333333, | |
| "grad_norm": 45.58321762084961, | |
| "learning_rate": 3e-06, | |
| "loss": 0.5204, | |
| "step": 1014 | |
| }, | |
| { | |
| "epoch": 0.09022222222222222, | |
| "grad_norm": 57.8793830871582, | |
| "learning_rate": 3e-06, | |
| "loss": -0.4941, | |
| "step": 1015 | |
| }, | |
| { | |
| "epoch": 0.0903111111111111, | |
| "grad_norm": 51.80791091918945, | |
| "learning_rate": 3e-06, | |
| "loss": -2.7426, | |
| "step": 1016 | |
| }, | |
| { | |
| "epoch": 0.0904, | |
| "grad_norm": 56.86159896850586, | |
| "learning_rate": 3e-06, | |
| "loss": -0.3923, | |
| "step": 1017 | |
| }, | |
| { | |
| "epoch": 0.09048888888888888, | |
| "grad_norm": 48.4435920715332, | |
| "learning_rate": 3e-06, | |
| "loss": -3.0717, | |
| "step": 1018 | |
| }, | |
| { | |
| "epoch": 0.09057777777777777, | |
| "grad_norm": 52.369598388671875, | |
| "learning_rate": 3e-06, | |
| "loss": -0.6683, | |
| "step": 1019 | |
| }, | |
| { | |
| "epoch": 0.09066666666666667, | |
| "grad_norm": 45.13036346435547, | |
| "learning_rate": 3e-06, | |
| "loss": -0.8826, | |
| "step": 1020 | |
| }, | |
| { | |
| "completion_length": 251.9166717529297, | |
| "epoch": 0.09075555555555556, | |
| "grad_norm": 0.0, | |
| "learning_rate": 3e-06, | |
| "loss": 0.0, | |
| "reward": 1.5, | |
| "reward_std": 0.0, | |
| "rewards/boxed_and_answer_tags_format_reward": 0.75, | |
| "rewards/correctness_reward_func_math": 0.75, | |
| "step": 1021, | |
| "zero_std_ratio": 1.0 | |
| }, | |
| { | |
| "epoch": 0.09084444444444445, | |
| "grad_norm": 0.0, | |
| "learning_rate": 3e-06, | |
| "loss": 0.0, | |
| "step": 1022 | |
| }, | |
| { | |
| "epoch": 0.09093333333333334, | |
| "grad_norm": 0.0, | |
| "learning_rate": 3e-06, | |
| "loss": 0.0, | |
| "step": 1023 | |
| }, | |
| { | |
| "epoch": 0.09102222222222223, | |
| "grad_norm": 0.0, | |
| "learning_rate": 3e-06, | |
| "loss": 0.0, | |
| "step": 1024 | |
| }, | |
| { | |
| "epoch": 0.09111111111111111, | |
| "grad_norm": 0.0, | |
| "learning_rate": 3e-06, | |
| "loss": 0.0, | |
| "step": 1025 | |
| }, | |
| { | |
| "epoch": 0.0912, | |
| "grad_norm": 0.0, | |
| "learning_rate": 3e-06, | |
| "loss": 0.0, | |
| "step": 1026 | |
| }, | |
| { | |
| "epoch": 0.09128888888888889, | |
| "grad_norm": 0.0, | |
| "learning_rate": 3e-06, | |
| "loss": 0.0, | |
| "step": 1027 | |
| }, | |
| { | |
| "epoch": 0.09137777777777778, | |
| "grad_norm": 0.0, | |
| "learning_rate": 3e-06, | |
| "loss": 0.0, | |
| "step": 1028 | |
| }, | |
| { | |
| "epoch": 0.09146666666666667, | |
| "grad_norm": 0.0, | |
| "learning_rate": 3e-06, | |
| "loss": 0.0, | |
| "step": 1029 | |
| }, | |
| { | |
| "epoch": 0.09155555555555556, | |
| "grad_norm": 0.0, | |
| "learning_rate": 3e-06, | |
| "loss": 0.0, | |
| "step": 1030 | |
| }, | |
| { | |
| "epoch": 0.09164444444444445, | |
| "grad_norm": 0.0, | |
| "learning_rate": 3e-06, | |
| "loss": 0.0, | |
| "step": 1031 | |
| }, | |
| { | |
| "epoch": 0.09173333333333333, | |
| "grad_norm": 0.0, | |
| "learning_rate": 3e-06, | |
| "loss": 0.0, | |
| "step": 1032 | |
| }, | |
| { | |
| "completion_length": 252.3541717529297, | |
| "epoch": 0.09182222222222222, | |
| "grad_norm": 126.83654022216797, | |
| "learning_rate": 3e-06, | |
| "loss": -8.3335, | |
| "reward": 1.625, | |
| "reward_std": 0.3410547971725464, | |
| "rewards/boxed_and_answer_tags_format_reward": 0.75, | |
| "rewards/correctness_reward_func_math": 0.875, | |
| "step": 1033, | |
| "zero_std_ratio": 0.625 | |
| }, | |
| { | |
| "epoch": 0.09191111111111111, | |
| "grad_norm": 102.25955963134766, | |
| "learning_rate": 3e-06, | |
| "loss": -6.4906, | |
| "step": 1034 | |
| }, | |
| { | |
| "epoch": 0.092, | |
| "grad_norm": 128.81582641601562, | |
| "learning_rate": 3e-06, | |
| "loss": -6.558, | |
| "step": 1035 | |
| }, | |
| { | |
| "epoch": 0.09208888888888889, | |
| "grad_norm": 112.67058563232422, | |
| "learning_rate": 3e-06, | |
| "loss": -9.3061, | |
| "step": 1036 | |
| }, | |
| { | |
| "epoch": 0.09217777777777778, | |
| "grad_norm": 108.50650024414062, | |
| "learning_rate": 3e-06, | |
| "loss": -6.0381, | |
| "step": 1037 | |
| }, | |
| { | |
| "epoch": 0.09226666666666666, | |
| "grad_norm": 108.7009506225586, | |
| "learning_rate": 3e-06, | |
| "loss": 0.0853, | |
| "step": 1038 | |
| }, | |
| { | |
| "epoch": 0.09235555555555555, | |
| "grad_norm": 114.24646759033203, | |
| "learning_rate": 3e-06, | |
| "loss": -9.0476, | |
| "step": 1039 | |
| }, | |
| { | |
| "epoch": 0.09244444444444444, | |
| "grad_norm": 99.69547271728516, | |
| "learning_rate": 3e-06, | |
| "loss": -8.1766, | |
| "step": 1040 | |
| }, | |
| { | |
| "epoch": 0.09253333333333333, | |
| "grad_norm": 142.9137725830078, | |
| "learning_rate": 3e-06, | |
| "loss": -7.6658, | |
| "step": 1041 | |
| }, | |
| { | |
| "epoch": 0.09262222222222222, | |
| "grad_norm": 113.05297088623047, | |
| "learning_rate": 3e-06, | |
| "loss": -10.693, | |
| "step": 1042 | |
| }, | |
| { | |
| "epoch": 0.0927111111111111, | |
| "grad_norm": 114.48544311523438, | |
| "learning_rate": 3e-06, | |
| "loss": -8.4665, | |
| "step": 1043 | |
| }, | |
| { | |
| "epoch": 0.0928, | |
| "grad_norm": 131.56544494628906, | |
| "learning_rate": 3e-06, | |
| "loss": -1.0749, | |
| "step": 1044 | |
| }, | |
| { | |
| "completion_length": 251.3125, | |
| "epoch": 0.09288888888888888, | |
| "grad_norm": 162.5680389404297, | |
| "learning_rate": 3e-06, | |
| "loss": 2.2852, | |
| "reward": 1.4479166865348816, | |
| "reward_std": 0.5932036638259888, | |
| "rewards/boxed_and_answer_tags_format_reward": 0.7395833432674408, | |
| "rewards/correctness_reward_func_math": 0.708333358168602, | |
| "step": 1045, | |
| "zero_std_ratio": 0.375 | |
| }, | |
| { | |
| "epoch": 0.09297777777777778, | |
| "grad_norm": 183.73635864257812, | |
| "learning_rate": 3e-06, | |
| "loss": 4.3862, | |
| "step": 1046 | |
| }, | |
| { | |
| "epoch": 0.09306666666666667, | |
| "grad_norm": 149.2581329345703, | |
| "learning_rate": 3e-06, | |
| "loss": 3.2506, | |
| "step": 1047 | |
| }, | |
| { | |
| "epoch": 0.09315555555555556, | |
| "grad_norm": 146.53892517089844, | |
| "learning_rate": 3e-06, | |
| "loss": 6.7977, | |
| "step": 1048 | |
| }, | |
| { | |
| "epoch": 0.09324444444444445, | |
| "grad_norm": 187.605224609375, | |
| "learning_rate": 3e-06, | |
| "loss": -4.9329, | |
| "step": 1049 | |
| }, | |
| { | |
| "epoch": 0.09333333333333334, | |
| "grad_norm": 135.69638061523438, | |
| "learning_rate": 3e-06, | |
| "loss": 1.364, | |
| "step": 1050 | |
| }, | |
| { | |
| "epoch": 0.09342222222222223, | |
| "grad_norm": 155.4630126953125, | |
| "learning_rate": 3e-06, | |
| "loss": -0.1606, | |
| "step": 1051 | |
| }, | |
| { | |
| "epoch": 0.09351111111111111, | |
| "grad_norm": 137.24606323242188, | |
| "learning_rate": 3e-06, | |
| "loss": 2.4667, | |
| "step": 1052 | |
| }, | |
| { | |
| "epoch": 0.0936, | |
| "grad_norm": 145.3031768798828, | |
| "learning_rate": 3e-06, | |
| "loss": 0.3328, | |
| "step": 1053 | |
| }, | |
| { | |
| "epoch": 0.09368888888888889, | |
| "grad_norm": 138.85008239746094, | |
| "learning_rate": 3e-06, | |
| "loss": 3.6961, | |
| "step": 1054 | |
| }, | |
| { | |
| "epoch": 0.09377777777777778, | |
| "grad_norm": 151.01002502441406, | |
| "learning_rate": 3e-06, | |
| "loss": -8.223, | |
| "step": 1055 | |
| }, | |
| { | |
| "epoch": 0.09386666666666667, | |
| "grad_norm": 130.21697998046875, | |
| "learning_rate": 3e-06, | |
| "loss": -1.2589, | |
| "step": 1056 | |
| }, | |
| { | |
| "completion_length": 255.4791717529297, | |
| "epoch": 0.09395555555555556, | |
| "grad_norm": 129.92930603027344, | |
| "learning_rate": 3e-06, | |
| "loss": -4.7404, | |
| "reward": 1.0, | |
| "reward_std": 0.43528565764427185, | |
| "rewards/boxed_and_answer_tags_format_reward": 0.625, | |
| "rewards/correctness_reward_func_math": 0.375, | |
| "step": 1057, | |
| "zero_std_ratio": 0.5 | |
| }, | |
| { | |
| "epoch": 0.09404444444444444, | |
| "grad_norm": 134.7594451904297, | |
| "learning_rate": 3e-06, | |
| "loss": -7.4851, | |
| "step": 1058 | |
| }, | |
| { | |
| "epoch": 0.09413333333333333, | |
| "grad_norm": 145.62059020996094, | |
| "learning_rate": 3e-06, | |
| "loss": -11.0884, | |
| "step": 1059 | |
| }, | |
| { | |
| "epoch": 0.09422222222222222, | |
| "grad_norm": 116.93612670898438, | |
| "learning_rate": 3e-06, | |
| "loss": 6.683, | |
| "step": 1060 | |
| }, | |
| { | |
| "epoch": 0.09431111111111111, | |
| "grad_norm": 126.82006072998047, | |
| "learning_rate": 3e-06, | |
| "loss": -7.7905, | |
| "step": 1061 | |
| }, | |
| { | |
| "epoch": 0.0944, | |
| "grad_norm": 252.49966430664062, | |
| "learning_rate": 3e-06, | |
| "loss": 4.6391, | |
| "step": 1062 | |
| }, | |
| { | |
| "epoch": 0.09448888888888889, | |
| "grad_norm": 129.0404052734375, | |
| "learning_rate": 3e-06, | |
| "loss": -6.2658, | |
| "step": 1063 | |
| }, | |
| { | |
| "epoch": 0.09457777777777777, | |
| "grad_norm": 140.08370971679688, | |
| "learning_rate": 3e-06, | |
| "loss": -8.4286, | |
| "step": 1064 | |
| }, | |
| { | |
| "epoch": 0.09466666666666666, | |
| "grad_norm": 114.8161392211914, | |
| "learning_rate": 3e-06, | |
| "loss": -12.9169, | |
| "step": 1065 | |
| }, | |
| { | |
| "epoch": 0.09475555555555555, | |
| "grad_norm": 112.29281616210938, | |
| "learning_rate": 3e-06, | |
| "loss": 4.7452, | |
| "step": 1066 | |
| }, | |
| { | |
| "epoch": 0.09484444444444444, | |
| "grad_norm": 140.55029296875, | |
| "learning_rate": 3e-06, | |
| "loss": -9.5303, | |
| "step": 1067 | |
| }, | |
| { | |
| "epoch": 0.09493333333333333, | |
| "grad_norm": 147.0571746826172, | |
| "learning_rate": 3e-06, | |
| "loss": 3.0677, | |
| "step": 1068 | |
| }, | |
| { | |
| "completion_length": 245.0416717529297, | |
| "epoch": 0.09502222222222222, | |
| "grad_norm": 101.13983154296875, | |
| "learning_rate": 3e-06, | |
| "loss": 7.3268, | |
| "reward": 0.7916666865348816, | |
| "reward_std": 0.27350127696990967, | |
| "rewards/boxed_and_answer_tags_format_reward": 0.4999999850988388, | |
| "rewards/correctness_reward_func_math": 0.2916666679084301, | |
| "step": 1069, | |
| "zero_std_ratio": 0.625 | |
| }, | |
| { | |
| "epoch": 0.0951111111111111, | |
| "grad_norm": 121.06708526611328, | |
| "learning_rate": 3e-06, | |
| "loss": 4.489, | |
| "step": 1070 | |
| }, | |
| { | |
| "epoch": 0.0952, | |
| "grad_norm": 127.9291763305664, | |
| "learning_rate": 3e-06, | |
| "loss": 1.7732, | |
| "step": 1071 | |
| }, | |
| { | |
| "epoch": 0.0952888888888889, | |
| "grad_norm": 120.67790222167969, | |
| "learning_rate": 3e-06, | |
| "loss": 7.2385, | |
| "step": 1072 | |
| }, | |
| { | |
| "epoch": 0.09537777777777778, | |
| "grad_norm": 98.45962524414062, | |
| "learning_rate": 3e-06, | |
| "loss": -9.8857, | |
| "step": 1073 | |
| }, | |
| { | |
| "epoch": 0.09546666666666667, | |
| "grad_norm": 110.4314193725586, | |
| "learning_rate": 3e-06, | |
| "loss": -1.6899, | |
| "step": 1074 | |
| }, | |
| { | |
| "epoch": 0.09555555555555556, | |
| "grad_norm": 101.835693359375, | |
| "learning_rate": 3e-06, | |
| "loss": 5.3228, | |
| "step": 1075 | |
| }, | |
| { | |
| "epoch": 0.09564444444444445, | |
| "grad_norm": 119.22904205322266, | |
| "learning_rate": 3e-06, | |
| "loss": 3.0508, | |
| "step": 1076 | |
| }, | |
| { | |
| "epoch": 0.09573333333333334, | |
| "grad_norm": 126.35284423828125, | |
| "learning_rate": 3e-06, | |
| "loss": -1.1565, | |
| "step": 1077 | |
| }, | |
| { | |
| "epoch": 0.09582222222222223, | |
| "grad_norm": 129.94705200195312, | |
| "learning_rate": 3e-06, | |
| "loss": 4.3463, | |
| "step": 1078 | |
| }, | |
| { | |
| "epoch": 0.09591111111111111, | |
| "grad_norm": 95.31863403320312, | |
| "learning_rate": 3e-06, | |
| "loss": -12.0709, | |
| "step": 1079 | |
| }, | |
| { | |
| "epoch": 0.096, | |
| "grad_norm": 111.38770294189453, | |
| "learning_rate": 3e-06, | |
| "loss": -4.804, | |
| "step": 1080 | |
| }, | |
| { | |
| "completion_length": 227.33333587646484, | |
| "epoch": 0.09608888888888889, | |
| "grad_norm": 93.04568481445312, | |
| "learning_rate": 3e-06, | |
| "loss": -1.8293, | |
| "reward": 1.2604166865348816, | |
| "reward_std": 0.4915197938680649, | |
| "rewards/boxed_and_answer_tags_format_reward": 0.6770833432674408, | |
| "rewards/correctness_reward_func_math": 0.5833333134651184, | |
| "step": 1081, | |
| "zero_std_ratio": 0.5 | |
| }, | |
| { | |
| "epoch": 0.09617777777777778, | |
| "grad_norm": 186.89431762695312, | |
| "learning_rate": 3e-06, | |
| "loss": -1.6852, | |
| "step": 1082 | |
| }, | |
| { | |
| "epoch": 0.09626666666666667, | |
| "grad_norm": 132.6072998046875, | |
| "learning_rate": 3e-06, | |
| "loss": -19.4138, | |
| "step": 1083 | |
| }, | |
| { | |
| "epoch": 0.09635555555555556, | |
| "grad_norm": 128.269287109375, | |
| "learning_rate": 3e-06, | |
| "loss": -7.5699, | |
| "step": 1084 | |
| }, | |
| { | |
| "epoch": 0.09644444444444444, | |
| "grad_norm": 104.99845123291016, | |
| "learning_rate": 3e-06, | |
| "loss": -14.2839, | |
| "step": 1085 | |
| }, | |
| { | |
| "epoch": 0.09653333333333333, | |
| "grad_norm": 127.01644897460938, | |
| "learning_rate": 3e-06, | |
| "loss": -6.2089, | |
| "step": 1086 | |
| }, | |
| { | |
| "epoch": 0.09662222222222222, | |
| "grad_norm": 99.6088638305664, | |
| "learning_rate": 3e-06, | |
| "loss": -3.0049, | |
| "step": 1087 | |
| }, | |
| { | |
| "epoch": 0.09671111111111111, | |
| "grad_norm": 120.30072784423828, | |
| "learning_rate": 3e-06, | |
| "loss": -3.4027, | |
| "step": 1088 | |
| }, | |
| { | |
| "epoch": 0.0968, | |
| "grad_norm": 121.11226654052734, | |
| "learning_rate": 3e-06, | |
| "loss": -20.6903, | |
| "step": 1089 | |
| }, | |
| { | |
| "epoch": 0.09688888888888889, | |
| "grad_norm": 101.49308013916016, | |
| "learning_rate": 3e-06, | |
| "loss": -8.6379, | |
| "step": 1090 | |
| }, | |
| { | |
| "epoch": 0.09697777777777777, | |
| "grad_norm": 117.81327819824219, | |
| "learning_rate": 3e-06, | |
| "loss": -15.1799, | |
| "step": 1091 | |
| }, | |
| { | |
| "epoch": 0.09706666666666666, | |
| "grad_norm": 114.92808532714844, | |
| "learning_rate": 3e-06, | |
| "loss": -8.7164, | |
| "step": 1092 | |
| }, | |
| { | |
| "completion_length": 245.68750762939453, | |
| "epoch": 0.09715555555555555, | |
| "grad_norm": 137.37049865722656, | |
| "learning_rate": 3e-06, | |
| "loss": -17.5632, | |
| "reward": 1.2604166865348816, | |
| "reward_std": 0.3936076760292053, | |
| "rewards/boxed_and_answer_tags_format_reward": 0.6770833432674408, | |
| "rewards/correctness_reward_func_math": 0.5833333134651184, | |
| "step": 1093, | |
| "zero_std_ratio": 0.5 | |
| }, | |
| { | |
| "epoch": 0.09724444444444444, | |
| "grad_norm": 104.14005279541016, | |
| "learning_rate": 3e-06, | |
| "loss": -13.8548, | |
| "step": 1094 | |
| }, | |
| { | |
| "epoch": 0.09733333333333333, | |
| "grad_norm": 111.48066711425781, | |
| "learning_rate": 3e-06, | |
| "loss": -14.4865, | |
| "step": 1095 | |
| }, | |
| { | |
| "epoch": 0.09742222222222222, | |
| "grad_norm": 115.86460876464844, | |
| "learning_rate": 3e-06, | |
| "loss": -12.7074, | |
| "step": 1096 | |
| }, | |
| { | |
| "epoch": 0.09751111111111112, | |
| "grad_norm": 108.50313568115234, | |
| "learning_rate": 3e-06, | |
| "loss": -17.9489, | |
| "step": 1097 | |
| }, | |
| { | |
| "epoch": 0.0976, | |
| "grad_norm": 113.4880599975586, | |
| "learning_rate": 3e-06, | |
| "loss": -16.1087, | |
| "step": 1098 | |
| }, | |
| { | |
| "epoch": 0.0976888888888889, | |
| "grad_norm": 109.29180145263672, | |
| "learning_rate": 3e-06, | |
| "loss": -19.2428, | |
| "step": 1099 | |
| }, | |
| { | |
| "epoch": 0.09777777777777778, | |
| "grad_norm": 107.1020278930664, | |
| "learning_rate": 3e-06, | |
| "loss": -15.2564, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 0.09786666666666667, | |
| "grad_norm": 131.5577392578125, | |
| "learning_rate": 3e-06, | |
| "loss": -15.6807, | |
| "step": 1101 | |
| }, | |
| { | |
| "epoch": 0.09795555555555556, | |
| "grad_norm": 121.74998474121094, | |
| "learning_rate": 3e-06, | |
| "loss": -15.0192, | |
| "step": 1102 | |
| }, | |
| { | |
| "epoch": 0.09804444444444445, | |
| "grad_norm": 107.87700653076172, | |
| "learning_rate": 3e-06, | |
| "loss": -19.6246, | |
| "step": 1103 | |
| }, | |
| { | |
| "epoch": 0.09813333333333334, | |
| "grad_norm": 132.6728515625, | |
| "learning_rate": 3e-06, | |
| "loss": -17.4119, | |
| "step": 1104 | |
| }, | |
| { | |
| "completion_length": 247.375, | |
| "epoch": 0.09822222222222222, | |
| "grad_norm": 194.66749572753906, | |
| "learning_rate": 3e-06, | |
| "loss": -59.2499, | |
| "reward": 1.7083333730697632, | |
| "reward_std": 0.6184598803520203, | |
| "rewards/boxed_and_answer_tags_format_reward": 0.75, | |
| "rewards/correctness_reward_func_math": 0.9583333432674408, | |
| "step": 1105, | |
| "zero_std_ratio": 0.375 | |
| }, | |
| { | |
| "epoch": 0.09831111111111111, | |
| "grad_norm": 179.941162109375, | |
| "learning_rate": 3e-06, | |
| "loss": -46.7423, | |
| "step": 1106 | |
| }, | |
| { | |
| "epoch": 0.0984, | |
| "grad_norm": 232.29762268066406, | |
| "learning_rate": 3e-06, | |
| "loss": -54.0588, | |
| "step": 1107 | |
| }, | |
| { | |
| "epoch": 0.09848888888888889, | |
| "grad_norm": 208.61793518066406, | |
| "learning_rate": 3e-06, | |
| "loss": -72.7937, | |
| "step": 1108 | |
| }, | |
| { | |
| "epoch": 0.09857777777777778, | |
| "grad_norm": 180.18431091308594, | |
| "learning_rate": 3e-06, | |
| "loss": -37.6563, | |
| "step": 1109 | |
| }, | |
| { | |
| "epoch": 0.09866666666666667, | |
| "grad_norm": 191.83653259277344, | |
| "learning_rate": 3e-06, | |
| "loss": -56.5117, | |
| "step": 1110 | |
| }, | |
| { | |
| "epoch": 0.09875555555555555, | |
| "grad_norm": 192.1591796875, | |
| "learning_rate": 3e-06, | |
| "loss": -63.0639, | |
| "step": 1111 | |
| }, | |
| { | |
| "epoch": 0.09884444444444444, | |
| "grad_norm": 183.53610229492188, | |
| "learning_rate": 3e-06, | |
| "loss": -50.2258, | |
| "step": 1112 | |
| }, | |
| { | |
| "epoch": 0.09893333333333333, | |
| "grad_norm": 233.22872924804688, | |
| "learning_rate": 3e-06, | |
| "loss": -58.1688, | |
| "step": 1113 | |
| }, | |
| { | |
| "epoch": 0.09902222222222222, | |
| "grad_norm": 219.78233337402344, | |
| "learning_rate": 3e-06, | |
| "loss": -78.4412, | |
| "step": 1114 | |
| }, | |
| { | |
| "epoch": 0.09911111111111111, | |
| "grad_norm": 189.9258270263672, | |
| "learning_rate": 3e-06, | |
| "loss": -42.5197, | |
| "step": 1115 | |
| }, | |
| { | |
| "epoch": 0.0992, | |
| "grad_norm": 204.02183532714844, | |
| "learning_rate": 3e-06, | |
| "loss": -61.9828, | |
| "step": 1116 | |
| }, | |
| { | |
| "completion_length": 248.9166717529297, | |
| "epoch": 0.09928888888888888, | |
| "grad_norm": 114.85681915283203, | |
| "learning_rate": 3e-06, | |
| "loss": 1.5984, | |
| "reward": 1.2395833730697632, | |
| "reward_std": 0.2296396717429161, | |
| "rewards/boxed_and_answer_tags_format_reward": 0.7395833432674408, | |
| "rewards/correctness_reward_func_math": 0.4999999850988388, | |
| "step": 1117, | |
| "zero_std_ratio": 0.625 | |
| }, | |
| { | |
| "epoch": 0.09937777777777777, | |
| "grad_norm": 113.66478729248047, | |
| "learning_rate": 3e-06, | |
| "loss": -2.2296, | |
| "step": 1118 | |
| }, | |
| { | |
| "epoch": 0.09946666666666666, | |
| "grad_norm": 124.4161148071289, | |
| "learning_rate": 3e-06, | |
| "loss": 4.382, | |
| "step": 1119 | |
| }, | |
| { | |
| "epoch": 0.09955555555555555, | |
| "grad_norm": 131.39085388183594, | |
| "learning_rate": 3e-06, | |
| "loss": 0.5734, | |
| "step": 1120 | |
| }, | |
| { | |
| "epoch": 0.09964444444444444, | |
| "grad_norm": 119.46894073486328, | |
| "learning_rate": 3e-06, | |
| "loss": 5.0378, | |
| "step": 1121 | |
| }, | |
| { | |
| "epoch": 0.09973333333333333, | |
| "grad_norm": 113.26507568359375, | |
| "learning_rate": 3e-06, | |
| "loss": 1.4215, | |
| "step": 1122 | |
| }, | |
| { | |
| "epoch": 0.09982222222222223, | |
| "grad_norm": 120.69562530517578, | |
| "learning_rate": 3e-06, | |
| "loss": 0.0203, | |
| "step": 1123 | |
| }, | |
| { | |
| "epoch": 0.09991111111111112, | |
| "grad_norm": 128.0107421875, | |
| "learning_rate": 3e-06, | |
| "loss": -3.7284, | |
| "step": 1124 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "grad_norm": 139.31997680664062, | |
| "learning_rate": 3e-06, | |
| "loss": 2.9527, | |
| "step": 1125 | |
| }, | |
| { | |
| "epoch": 0.1000888888888889, | |
| "grad_norm": 111.49156188964844, | |
| "learning_rate": 3e-06, | |
| "loss": -1.217, | |
| "step": 1126 | |
| }, | |
| { | |
| "epoch": 0.10017777777777778, | |
| "grad_norm": 107.92985534667969, | |
| "learning_rate": 3e-06, | |
| "loss": 3.7084, | |
| "step": 1127 | |
| }, | |
| { | |
| "epoch": 0.10026666666666667, | |
| "grad_norm": 112.88748168945312, | |
| "learning_rate": 3e-06, | |
| "loss": -0.5577, | |
| "step": 1128 | |
| }, | |
| { | |
| "completion_length": 245.18750762939453, | |
| "epoch": 0.10035555555555556, | |
| "grad_norm": 124.6043930053711, | |
| "learning_rate": 3e-06, | |
| "loss": -20.6745, | |
| "reward": 1.4583333432674408, | |
| "reward_std": 0.5722163170576096, | |
| "rewards/boxed_and_answer_tags_format_reward": 0.625, | |
| "rewards/correctness_reward_func_math": 0.8333333283662796, | |
| "step": 1129, | |
| "zero_std_ratio": 0.375 | |
| }, | |
| { | |
| "epoch": 0.10044444444444445, | |
| "grad_norm": 118.25672149658203, | |
| "learning_rate": 3e-06, | |
| "loss": -32.0798, | |
| "step": 1130 | |
| }, | |
| { | |
| "epoch": 0.10053333333333334, | |
| "grad_norm": 189.2974853515625, | |
| "learning_rate": 3e-06, | |
| "loss": -23.7609, | |
| "step": 1131 | |
| }, | |
| { | |
| "epoch": 0.10062222222222222, | |
| "grad_norm": 139.94485473632812, | |
| "learning_rate": 3e-06, | |
| "loss": -30.0811, | |
| "step": 1132 | |
| }, | |
| { | |
| "epoch": 0.10071111111111111, | |
| "grad_norm": 132.92324829101562, | |
| "learning_rate": 3e-06, | |
| "loss": -29.1304, | |
| "step": 1133 | |
| }, | |
| { | |
| "epoch": 0.1008, | |
| "grad_norm": 129.6322021484375, | |
| "learning_rate": 3e-06, | |
| "loss": -25.8999, | |
| "step": 1134 | |
| }, | |
| { | |
| "epoch": 0.10088888888888889, | |
| "grad_norm": 122.53899383544922, | |
| "learning_rate": 3e-06, | |
| "loss": -22.266, | |
| "step": 1135 | |
| }, | |
| { | |
| "epoch": 0.10097777777777778, | |
| "grad_norm": 111.6375961303711, | |
| "learning_rate": 3e-06, | |
| "loss": -32.9394, | |
| "step": 1136 | |
| }, | |
| { | |
| "epoch": 0.10106666666666667, | |
| "grad_norm": 162.63771057128906, | |
| "learning_rate": 3e-06, | |
| "loss": -25.9897, | |
| "step": 1137 | |
| }, | |
| { | |
| "epoch": 0.10115555555555555, | |
| "grad_norm": 154.28424072265625, | |
| "learning_rate": 3e-06, | |
| "loss": -32.7633, | |
| "step": 1138 | |
| }, | |
| { | |
| "epoch": 0.10124444444444444, | |
| "grad_norm": 132.94351196289062, | |
| "learning_rate": 3e-06, | |
| "loss": -31.2033, | |
| "step": 1139 | |
| }, | |
| { | |
| "epoch": 0.10133333333333333, | |
| "grad_norm": 131.9947052001953, | |
| "learning_rate": 3e-06, | |
| "loss": -27.4069, | |
| "step": 1140 | |
| }, | |
| { | |
| "completion_length": 248.625, | |
| "epoch": 0.10142222222222222, | |
| "grad_norm": 113.37858581542969, | |
| "learning_rate": 3e-06, | |
| "loss": -14.7283, | |
| "reward": 0.9479166865348816, | |
| "reward_std": 0.2296396642923355, | |
| "rewards/boxed_and_answer_tags_format_reward": 0.6145833432674408, | |
| "rewards/correctness_reward_func_math": 0.3333333246409893, | |
| "step": 1141, | |
| "zero_std_ratio": 0.625 | |
| }, | |
| { | |
| "epoch": 0.10151111111111111, | |
| "grad_norm": 112.18434143066406, | |
| "learning_rate": 3e-06, | |
| "loss": -11.645, | |
| "step": 1142 | |
| }, | |
| { | |
| "epoch": 0.1016, | |
| "grad_norm": 105.41796112060547, | |
| "learning_rate": 3e-06, | |
| "loss": -15.9935, | |
| "step": 1143 | |
| }, | |
| { | |
| "epoch": 0.10168888888888888, | |
| "grad_norm": 91.3786849975586, | |
| "learning_rate": 3e-06, | |
| "loss": -16.6043, | |
| "step": 1144 | |
| }, | |
| { | |
| "epoch": 0.10177777777777777, | |
| "grad_norm": 104.3455810546875, | |
| "learning_rate": 3e-06, | |
| "loss": -3.4885, | |
| "step": 1145 | |
| }, | |
| { | |
| "epoch": 0.10186666666666666, | |
| "grad_norm": 87.41930389404297, | |
| "learning_rate": 3e-06, | |
| "loss": -20.634, | |
| "step": 1146 | |
| }, | |
| { | |
| "epoch": 0.10195555555555555, | |
| "grad_norm": 90.50940704345703, | |
| "learning_rate": 3e-06, | |
| "loss": -16.7866, | |
| "step": 1147 | |
| }, | |
| { | |
| "epoch": 0.10204444444444444, | |
| "grad_norm": 117.73979949951172, | |
| "learning_rate": 3e-06, | |
| "loss": -13.9565, | |
| "step": 1148 | |
| }, | |
| { | |
| "epoch": 0.10213333333333334, | |
| "grad_norm": 117.4783706665039, | |
| "learning_rate": 3e-06, | |
| "loss": -18.3371, | |
| "step": 1149 | |
| }, | |
| { | |
| "epoch": 0.10222222222222223, | |
| "grad_norm": 90.1675033569336, | |
| "learning_rate": 3e-06, | |
| "loss": -19.5285, | |
| "step": 1150 | |
| }, | |
| { | |
| "epoch": 0.10231111111111112, | |
| "grad_norm": 114.65315246582031, | |
| "learning_rate": 3e-06, | |
| "loss": -6.9923, | |
| "step": 1151 | |
| }, | |
| { | |
| "epoch": 0.1024, | |
| "grad_norm": 236.66262817382812, | |
| "learning_rate": 3e-06, | |
| "loss": -23.6847, | |
| "step": 1152 | |
| }, | |
| { | |
| "completion_length": 254.0625, | |
| "epoch": 0.1024888888888889, | |
| "grad_norm": 112.95475769042969, | |
| "learning_rate": 3e-06, | |
| "loss": -27.7434, | |
| "reward": 0.96875, | |
| "reward_std": 0.3734789788722992, | |
| "rewards/boxed_and_answer_tags_format_reward": 0.71875, | |
| "rewards/correctness_reward_func_math": 0.25, | |
| "step": 1153, | |
| "zero_std_ratio": 0.5 | |
| }, | |
| { | |
| "epoch": 0.10257777777777778, | |
| "grad_norm": 144.55364990234375, | |
| "learning_rate": 3e-06, | |
| "loss": -22.4297, | |
| "step": 1154 | |
| }, | |
| { | |
| "epoch": 0.10266666666666667, | |
| "grad_norm": 128.1786346435547, | |
| "learning_rate": 3e-06, | |
| "loss": -34.1587, | |
| "step": 1155 | |
| }, | |
| { | |
| "epoch": 0.10275555555555556, | |
| "grad_norm": 125.85458374023438, | |
| "learning_rate": 3e-06, | |
| "loss": -30.6015, | |
| "step": 1156 | |
| }, | |
| { | |
| "epoch": 0.10284444444444445, | |
| "grad_norm": 142.524658203125, | |
| "learning_rate": 3e-06, | |
| "loss": -31.4122, | |
| "step": 1157 | |
| }, | |
| { | |
| "epoch": 0.10293333333333334, | |
| "grad_norm": 111.42877197265625, | |
| "learning_rate": 3e-06, | |
| "loss": -26.1967, | |
| "step": 1158 | |
| }, | |
| { | |
| "epoch": 0.10302222222222222, | |
| "grad_norm": 123.39324951171875, | |
| "learning_rate": 3e-06, | |
| "loss": -29.3019, | |
| "step": 1159 | |
| }, | |
| { | |
| "epoch": 0.10311111111111111, | |
| "grad_norm": 151.65802001953125, | |
| "learning_rate": 3e-06, | |
| "loss": -24.3001, | |
| "step": 1160 | |
| }, | |
| { | |
| "epoch": 0.1032, | |
| "grad_norm": 127.43438720703125, | |
| "learning_rate": 3e-06, | |
| "loss": -36.1734, | |
| "step": 1161 | |
| }, | |
| { | |
| "epoch": 0.10328888888888889, | |
| "grad_norm": 123.67347717285156, | |
| "learning_rate": 3e-06, | |
| "loss": -33.6541, | |
| "step": 1162 | |
| }, | |
| { | |
| "epoch": 0.10337777777777778, | |
| "grad_norm": 147.1012420654297, | |
| "learning_rate": 3e-06, | |
| "loss": -33.954, | |
| "step": 1163 | |
| }, | |
| { | |
| "epoch": 0.10346666666666667, | |
| "grad_norm": 125.47201538085938, | |
| "learning_rate": 3e-06, | |
| "loss": -28.7879, | |
| "step": 1164 | |
| }, | |
| { | |
| "completion_length": 236.64583587646484, | |
| "epoch": 0.10355555555555555, | |
| "grad_norm": 348.24346923828125, | |
| "learning_rate": 3e-06, | |
| "loss": -9.8131, | |
| "reward": 1.4375000596046448, | |
| "reward_std": 0.4971916079521179, | |
| "rewards/boxed_and_answer_tags_format_reward": 0.6875, | |
| "rewards/correctness_reward_func_math": 0.75, | |
| "step": 1165, | |
| "zero_std_ratio": 0.5 | |
| }, | |
| { | |
| "epoch": 0.10364444444444444, | |
| "grad_norm": 189.84584045410156, | |
| "learning_rate": 3e-06, | |
| "loss": -5.3377, | |
| "step": 1166 | |
| }, | |
| { | |
| "epoch": 0.10373333333333333, | |
| "grad_norm": 192.2544403076172, | |
| "learning_rate": 3e-06, | |
| "loss": -1.7722, | |
| "step": 1167 | |
| }, | |
| { | |
| "epoch": 0.10382222222222222, | |
| "grad_norm": 180.6472625732422, | |
| "learning_rate": 3e-06, | |
| "loss": -6.7487, | |
| "step": 1168 | |
| }, | |
| { | |
| "epoch": 0.1039111111111111, | |
| "grad_norm": 197.0133819580078, | |
| "learning_rate": 3e-06, | |
| "loss": -2.4648, | |
| "step": 1169 | |
| }, | |
| { | |
| "epoch": 0.104, | |
| "grad_norm": 256.5722351074219, | |
| "learning_rate": 3e-06, | |
| "loss": -17.2997, | |
| "step": 1170 | |
| }, | |
| { | |
| "epoch": 0.10408888888888888, | |
| "grad_norm": 179.86228942871094, | |
| "learning_rate": 3e-06, | |
| "loss": -12.5209, | |
| "step": 1171 | |
| }, | |
| { | |
| "epoch": 0.10417777777777777, | |
| "grad_norm": 195.3128204345703, | |
| "learning_rate": 3e-06, | |
| "loss": -9.6999, | |
| "step": 1172 | |
| }, | |
| { | |
| "epoch": 0.10426666666666666, | |
| "grad_norm": 204.9373321533203, | |
| "learning_rate": 3e-06, | |
| "loss": -6.9052, | |
| "step": 1173 | |
| }, | |
| { | |
| "epoch": 0.10435555555555555, | |
| "grad_norm": 192.8905792236328, | |
| "learning_rate": 3e-06, | |
| "loss": -10.4987, | |
| "step": 1174 | |
| }, | |
| { | |
| "epoch": 0.10444444444444445, | |
| "grad_norm": 181.99449157714844, | |
| "learning_rate": 3e-06, | |
| "loss": -7.0688, | |
| "step": 1175 | |
| }, | |
| { | |
| "epoch": 0.10453333333333334, | |
| "grad_norm": 189.1867218017578, | |
| "learning_rate": 3e-06, | |
| "loss": -22.4777, | |
| "step": 1176 | |
| }, | |
| { | |
| "completion_length": 250.33333587646484, | |
| "epoch": 0.10462222222222223, | |
| "grad_norm": 158.67459106445312, | |
| "learning_rate": 3e-06, | |
| "loss": -8.4502, | |
| "reward": 0.9583333432674408, | |
| "reward_std": 0.5451789498329163, | |
| "rewards/boxed_and_answer_tags_format_reward": 0.5, | |
| "rewards/correctness_reward_func_math": 0.4583333358168602, | |
| "step": 1177, | |
| "zero_std_ratio": 0.375 | |
| }, | |
| { | |
| "epoch": 0.10471111111111112, | |
| "grad_norm": 158.85533142089844, | |
| "learning_rate": 3e-06, | |
| "loss": -13.1446, | |
| "step": 1178 | |
| }, | |
| { | |
| "epoch": 0.1048, | |
| "grad_norm": 155.9822235107422, | |
| "learning_rate": 3e-06, | |
| "loss": -6.7899, | |
| "step": 1179 | |
| }, | |
| { | |
| "epoch": 0.10488888888888889, | |
| "grad_norm": 150.00985717773438, | |
| "learning_rate": 3e-06, | |
| "loss": 5.268, | |
| "step": 1180 | |
| }, | |
| { | |
| "epoch": 0.10497777777777778, | |
| "grad_norm": 140.22618103027344, | |
| "learning_rate": 3e-06, | |
| "loss": -3.0269, | |
| "step": 1181 | |
| }, | |
| { | |
| "epoch": 0.10506666666666667, | |
| "grad_norm": 130.9547119140625, | |
| "learning_rate": 3e-06, | |
| "loss": -6.5307, | |
| "step": 1182 | |
| }, | |
| { | |
| "epoch": 0.10515555555555556, | |
| "grad_norm": 157.96466064453125, | |
| "learning_rate": 3e-06, | |
| "loss": -10.1506, | |
| "step": 1183 | |
| }, | |
| { | |
| "epoch": 0.10524444444444445, | |
| "grad_norm": 162.57582092285156, | |
| "learning_rate": 3e-06, | |
| "loss": -14.3085, | |
| "step": 1184 | |
| }, | |
| { | |
| "epoch": 0.10533333333333333, | |
| "grad_norm": 163.5466766357422, | |
| "learning_rate": 3e-06, | |
| "loss": -9.5339, | |
| "step": 1185 | |
| }, | |
| { | |
| "epoch": 0.10542222222222222, | |
| "grad_norm": 126.0348129272461, | |
| "learning_rate": 3e-06, | |
| "loss": 2.7285, | |
| "step": 1186 | |
| }, | |
| { | |
| "epoch": 0.10551111111111111, | |
| "grad_norm": 152.0486297607422, | |
| "learning_rate": 3e-06, | |
| "loss": -5.0055, | |
| "step": 1187 | |
| }, | |
| { | |
| "epoch": 0.1056, | |
| "grad_norm": 135.672607421875, | |
| "learning_rate": 3e-06, | |
| "loss": -7.9305, | |
| "step": 1188 | |
| }, | |
| { | |
| "completion_length": 242.43750762939453, | |
| "epoch": 0.10568888888888889, | |
| "grad_norm": 102.83517456054688, | |
| "learning_rate": 3e-06, | |
| "loss": -9.6079, | |
| "reward": 1.5833333730697632, | |
| "reward_std": 0.3332235962152481, | |
| "rewards/boxed_and_answer_tags_format_reward": 0.75, | |
| "rewards/correctness_reward_func_math": 0.8333333134651184, | |
| "step": 1189, | |
| "zero_std_ratio": 0.625 | |
| }, | |
| { | |
| "epoch": 0.10577777777777778, | |
| "grad_norm": 104.57038116455078, | |
| "learning_rate": 3e-06, | |
| "loss": 3.4859, | |
| "step": 1190 | |
| }, | |
| { | |
| "epoch": 0.10586666666666666, | |
| "grad_norm": 130.5141143798828, | |
| "learning_rate": 3e-06, | |
| "loss": 9.6096, | |
| "step": 1191 | |
| }, | |
| { | |
| "epoch": 0.10595555555555555, | |
| "grad_norm": 121.0637435913086, | |
| "learning_rate": 3e-06, | |
| "loss": -7.0112, | |
| "step": 1192 | |
| }, | |
| { | |
| "epoch": 0.10604444444444444, | |
| "grad_norm": 116.66060638427734, | |
| "learning_rate": 3e-06, | |
| "loss": -0.6318, | |
| "step": 1193 | |
| }, | |
| { | |
| "epoch": 0.10613333333333333, | |
| "grad_norm": 93.47602844238281, | |
| "learning_rate": 3e-06, | |
| "loss": -7.121, | |
| "step": 1194 | |
| }, | |
| { | |
| "epoch": 0.10622222222222222, | |
| "grad_norm": 97.91071319580078, | |
| "learning_rate": 3e-06, | |
| "loss": -11.2048, | |
| "step": 1195 | |
| }, | |
| { | |
| "epoch": 0.1063111111111111, | |
| "grad_norm": 99.79684448242188, | |
| "learning_rate": 3e-06, | |
| "loss": 2.4369, | |
| "step": 1196 | |
| }, | |
| { | |
| "epoch": 0.1064, | |
| "grad_norm": 103.4743423461914, | |
| "learning_rate": 3e-06, | |
| "loss": 7.3113, | |
| "step": 1197 | |
| }, | |
| { | |
| "epoch": 0.10648888888888888, | |
| "grad_norm": 115.71762084960938, | |
| "learning_rate": 3e-06, | |
| "loss": -8.8378, | |
| "step": 1198 | |
| }, | |
| { | |
| "epoch": 0.10657777777777777, | |
| "grad_norm": 116.43769073486328, | |
| "learning_rate": 3e-06, | |
| "loss": -2.6688, | |
| "step": 1199 | |
| }, | |
| { | |
| "epoch": 0.10666666666666667, | |
| "grad_norm": 93.20524597167969, | |
| "learning_rate": 3e-06, | |
| "loss": -9.0718, | |
| "step": 1200 | |
| }, | |
| { | |
| "completion_length": 242.77083587646484, | |
| "epoch": 0.10675555555555556, | |
| "grad_norm": 156.0432586669922, | |
| "learning_rate": 3e-06, | |
| "loss": 6.0861, | |
| "reward": 1.6770833730697632, | |
| "reward_std": 0.5419133305549622, | |
| "rewards/boxed_and_answer_tags_format_reward": 0.6770833432674408, | |
| "rewards/correctness_reward_func_math": 1.0, | |
| "step": 1201, | |
| "zero_std_ratio": 0.375 | |
| }, | |
| { | |
| "epoch": 0.10684444444444445, | |
| "grad_norm": 173.78958129882812, | |
| "learning_rate": 3e-06, | |
| "loss": 9.0247, | |
| "step": 1202 | |
| }, | |
| { | |
| "epoch": 0.10693333333333334, | |
| "grad_norm": 152.82534790039062, | |
| "learning_rate": 3e-06, | |
| "loss": 5.6834, | |
| "step": 1203 | |
| }, | |
| { | |
| "epoch": 0.10702222222222223, | |
| "grad_norm": 151.876953125, | |
| "learning_rate": 3e-06, | |
| "loss": 6.9018, | |
| "step": 1204 | |
| }, | |
| { | |
| "epoch": 0.10711111111111112, | |
| "grad_norm": 165.88400268554688, | |
| "learning_rate": 3e-06, | |
| "loss": 9.1234, | |
| "step": 1205 | |
| }, | |
| { | |
| "epoch": 0.1072, | |
| "grad_norm": 167.48348999023438, | |
| "learning_rate": 3e-06, | |
| "loss": 9.4483, | |
| "step": 1206 | |
| }, | |
| { | |
| "epoch": 0.10728888888888889, | |
| "grad_norm": 149.90536499023438, | |
| "learning_rate": 3e-06, | |
| "loss": 2.7725, | |
| "step": 1207 | |
| }, | |
| { | |
| "epoch": 0.10737777777777778, | |
| "grad_norm": 169.23411560058594, | |
| "learning_rate": 3e-06, | |
| "loss": 6.9296, | |
| "step": 1208 | |
| }, | |
| { | |
| "epoch": 0.10746666666666667, | |
| "grad_norm": 233.46914672851562, | |
| "learning_rate": 3e-06, | |
| "loss": 3.5024, | |
| "step": 1209 | |
| }, | |
| { | |
| "epoch": 0.10755555555555556, | |
| "grad_norm": 147.3250732421875, | |
| "learning_rate": 3e-06, | |
| "loss": 3.7723, | |
| "step": 1210 | |
| }, | |
| { | |
| "epoch": 0.10764444444444445, | |
| "grad_norm": 155.88914489746094, | |
| "learning_rate": 3e-06, | |
| "loss": 7.0329, | |
| "step": 1211 | |
| }, | |
| { | |
| "epoch": 0.10773333333333333, | |
| "grad_norm": 153.82785034179688, | |
| "learning_rate": 3e-06, | |
| "loss": 7.0486, | |
| "step": 1212 | |
| }, | |
| { | |
| "completion_length": 247.6666717529297, | |
| "epoch": 0.10782222222222222, | |
| "grad_norm": 158.41856384277344, | |
| "learning_rate": 3e-06, | |
| "loss": 18.2887, | |
| "reward": 1.614583432674408, | |
| "reward_std": 0.3665703386068344, | |
| "rewards/boxed_and_answer_tags_format_reward": 0.7395833432674408, | |
| "rewards/correctness_reward_func_math": 0.8750000149011612, | |
| "step": 1213, | |
| "zero_std_ratio": 0.5 | |
| }, | |
| { | |
| "epoch": 0.10791111111111111, | |
| "grad_norm": 163.51866149902344, | |
| "learning_rate": 3e-06, | |
| "loss": 5.6461, | |
| "step": 1214 | |
| }, | |
| { | |
| "epoch": 0.108, | |
| "grad_norm": 131.4403839111328, | |
| "learning_rate": 3e-06, | |
| "loss": 3.3667, | |
| "step": 1215 | |
| }, | |
| { | |
| "epoch": 0.10808888888888889, | |
| "grad_norm": 136.77757263183594, | |
| "learning_rate": 3e-06, | |
| "loss": 4.406, | |
| "step": 1216 | |
| }, | |
| { | |
| "epoch": 0.10817777777777778, | |
| "grad_norm": 113.4407958984375, | |
| "learning_rate": 3e-06, | |
| "loss": 12.3995, | |
| "step": 1217 | |
| }, | |
| { | |
| "epoch": 0.10826666666666666, | |
| "grad_norm": 136.1168212890625, | |
| "learning_rate": 3e-06, | |
| "loss": 4.6776, | |
| "step": 1218 | |
| }, | |
| { | |
| "epoch": 0.10835555555555555, | |
| "grad_norm": 144.9372100830078, | |
| "learning_rate": 3e-06, | |
| "loss": 17.5724, | |
| "step": 1219 | |
| }, | |
| { | |
| "epoch": 0.10844444444444444, | |
| "grad_norm": 125.29820251464844, | |
| "learning_rate": 3e-06, | |
| "loss": 4.1424, | |
| "step": 1220 | |
| }, | |
| { | |
| "epoch": 0.10853333333333333, | |
| "grad_norm": 138.5077667236328, | |
| "learning_rate": 3e-06, | |
| "loss": 1.3947, | |
| "step": 1221 | |
| }, | |
| { | |
| "epoch": 0.10862222222222222, | |
| "grad_norm": 128.62693786621094, | |
| "learning_rate": 3e-06, | |
| "loss": 2.4864, | |
| "step": 1222 | |
| }, | |
| { | |
| "epoch": 0.1087111111111111, | |
| "grad_norm": 107.87577056884766, | |
| "learning_rate": 3e-06, | |
| "loss": 10.4239, | |
| "step": 1223 | |
| }, | |
| { | |
| "epoch": 0.1088, | |
| "grad_norm": 127.07205963134766, | |
| "learning_rate": 3e-06, | |
| "loss": 1.3109, | |
| "step": 1224 | |
| }, | |
| { | |
| "completion_length": 238.89584350585938, | |
| "epoch": 0.10888888888888888, | |
| "grad_norm": 154.35325622558594, | |
| "learning_rate": 3e-06, | |
| "loss": 8.6596, | |
| "reward": 1.5208333730697632, | |
| "reward_std": 0.3680921420454979, | |
| "rewards/boxed_and_answer_tags_format_reward": 0.6875, | |
| "rewards/correctness_reward_func_math": 0.8333333432674408, | |
| "step": 1225, | |
| "zero_std_ratio": 0.625 | |
| }, | |
| { | |
| "epoch": 0.10897777777777778, | |
| "grad_norm": 165.58445739746094, | |
| "learning_rate": 3e-06, | |
| "loss": 1.8278, | |
| "step": 1226 | |
| }, | |
| { | |
| "epoch": 0.10906666666666667, | |
| "grad_norm": 360.30029296875, | |
| "learning_rate": 3e-06, | |
| "loss": -9.1779, | |
| "step": 1227 | |
| }, | |
| { | |
| "epoch": 0.10915555555555556, | |
| "grad_norm": 152.518310546875, | |
| "learning_rate": 3e-06, | |
| "loss": -9.97, | |
| "step": 1228 | |
| }, | |
| { | |
| "epoch": 0.10924444444444445, | |
| "grad_norm": 146.67100524902344, | |
| "learning_rate": 3e-06, | |
| "loss": -6.3997, | |
| "step": 1229 | |
| }, | |
| { | |
| "epoch": 0.10933333333333334, | |
| "grad_norm": 138.9771728515625, | |
| "learning_rate": 3e-06, | |
| "loss": -6.0985, | |
| "step": 1230 | |
| }, | |
| { | |
| "epoch": 0.10942222222222223, | |
| "grad_norm": 159.66302490234375, | |
| "learning_rate": 3e-06, | |
| "loss": 5.5207, | |
| "step": 1231 | |
| }, | |
| { | |
| "epoch": 0.10951111111111111, | |
| "grad_norm": 138.22695922851562, | |
| "learning_rate": 3e-06, | |
| "loss": -0.4886, | |
| "step": 1232 | |
| }, | |
| { | |
| "epoch": 0.1096, | |
| "grad_norm": 160.3970489501953, | |
| "learning_rate": 3e-06, | |
| "loss": -12.1071, | |
| "step": 1233 | |
| }, | |
| { | |
| "epoch": 0.10968888888888889, | |
| "grad_norm": 141.63226318359375, | |
| "learning_rate": 3e-06, | |
| "loss": -13.6489, | |
| "step": 1234 | |
| }, | |
| { | |
| "epoch": 0.10977777777777778, | |
| "grad_norm": 146.45748901367188, | |
| "learning_rate": 3e-06, | |
| "loss": -10.0014, | |
| "step": 1235 | |
| }, | |
| { | |
| "epoch": 0.10986666666666667, | |
| "grad_norm": 140.94992065429688, | |
| "learning_rate": 3e-06, | |
| "loss": -9.6508, | |
| "step": 1236 | |
| }, | |
| { | |
| "completion_length": 255.4166717529297, | |
| "epoch": 0.10995555555555556, | |
| "grad_norm": 160.60601806640625, | |
| "learning_rate": 3e-06, | |
| "loss": 1.8826, | |
| "reward": 0.8020833432674408, | |
| "reward_std": 0.3381742835044861, | |
| "rewards/boxed_and_answer_tags_format_reward": 0.6354166865348816, | |
| "rewards/correctness_reward_func_math": 0.1666666679084301, | |
| "step": 1237, | |
| "zero_std_ratio": 0.625 | |
| }, | |
| { | |
| "epoch": 0.11004444444444444, | |
| "grad_norm": 186.42630004882812, | |
| "learning_rate": 3e-06, | |
| "loss": -4.6822, | |
| "step": 1238 | |
| }, | |
| { | |
| "epoch": 0.11013333333333333, | |
| "grad_norm": 141.29820251464844, | |
| "learning_rate": 3e-06, | |
| "loss": 11.1707, | |
| "step": 1239 | |
| }, | |
| { | |
| "epoch": 0.11022222222222222, | |
| "grad_norm": 144.1931610107422, | |
| "learning_rate": 3e-06, | |
| "loss": 3.3338, | |
| "step": 1240 | |
| }, | |
| { | |
| "epoch": 0.11031111111111111, | |
| "grad_norm": 135.27630615234375, | |
| "learning_rate": 3e-06, | |
| "loss": 4.9367, | |
| "step": 1241 | |
| }, | |
| { | |
| "epoch": 0.1104, | |
| "grad_norm": 175.80433654785156, | |
| "learning_rate": 3e-06, | |
| "loss": -13.1628, | |
| "step": 1242 | |
| }, | |
| { | |
| "epoch": 0.11048888888888889, | |
| "grad_norm": 165.0531463623047, | |
| "learning_rate": 3e-06, | |
| "loss": -0.3721, | |
| "step": 1243 | |
| }, | |
| { | |
| "epoch": 0.11057777777777777, | |
| "grad_norm": 264.06695556640625, | |
| "learning_rate": 3e-06, | |
| "loss": -7.2717, | |
| "step": 1244 | |
| }, | |
| { | |
| "epoch": 0.11066666666666666, | |
| "grad_norm": 145.43423461914062, | |
| "learning_rate": 3e-06, | |
| "loss": 8.5606, | |
| "step": 1245 | |
| }, | |
| { | |
| "epoch": 0.11075555555555555, | |
| "grad_norm": 145.75946044921875, | |
| "learning_rate": 3e-06, | |
| "loss": 0.9458, | |
| "step": 1246 | |
| }, | |
| { | |
| "epoch": 0.11084444444444444, | |
| "grad_norm": 125.21862030029297, | |
| "learning_rate": 3e-06, | |
| "loss": 3.1516, | |
| "step": 1247 | |
| }, | |
| { | |
| "epoch": 0.11093333333333333, | |
| "grad_norm": 177.2153778076172, | |
| "learning_rate": 3e-06, | |
| "loss": -17.2454, | |
| "step": 1248 | |
| }, | |
| { | |
| "completion_length": 251.6041717529297, | |
| "epoch": 0.11102222222222222, | |
| "grad_norm": 371.7901306152344, | |
| "learning_rate": 3e-06, | |
| "loss": -30.6046, | |
| "reward": 1.9895833730697632, | |
| "reward_std": 0.8478911817073822, | |
| "rewards/boxed_and_answer_tags_format_reward": 0.7395833432674408, | |
| "rewards/correctness_reward_func_math": 1.25, | |
| "step": 1249, | |
| "zero_std_ratio": 0.125 | |
| }, | |
| { | |
| "epoch": 0.1111111111111111, | |
| "grad_norm": 241.84666442871094, | |
| "learning_rate": 3e-06, | |
| "loss": -47.9038, | |
| "step": 1250 | |
| }, | |
| { | |
| "epoch": 0.1112, | |
| "grad_norm": 187.25973510742188, | |
| "learning_rate": 3e-06, | |
| "loss": -31.5847, | |
| "step": 1251 | |
| }, | |
| { | |
| "epoch": 0.1112888888888889, | |
| "grad_norm": 220.72537231445312, | |
| "learning_rate": 3e-06, | |
| "loss": -37.9464, | |
| "step": 1252 | |
| }, | |
| { | |
| "epoch": 0.11137777777777778, | |
| "grad_norm": 215.938720703125, | |
| "learning_rate": 3e-06, | |
| "loss": -36.917, | |
| "step": 1253 | |
| }, | |
| { | |
| "epoch": 0.11146666666666667, | |
| "grad_norm": 255.27755737304688, | |
| "learning_rate": 3e-06, | |
| "loss": -36.4102, | |
| "step": 1254 | |
| }, | |
| { | |
| "epoch": 0.11155555555555556, | |
| "grad_norm": 252.8043212890625, | |
| "learning_rate": 3e-06, | |
| "loss": -32.6218, | |
| "step": 1255 | |
| }, | |
| { | |
| "epoch": 0.11164444444444445, | |
| "grad_norm": 225.40321350097656, | |
| "learning_rate": 3e-06, | |
| "loss": -51.6723, | |
| "step": 1256 | |
| }, | |
| { | |
| "epoch": 0.11173333333333334, | |
| "grad_norm": 208.3738250732422, | |
| "learning_rate": 3e-06, | |
| "loss": -35.6313, | |
| "step": 1257 | |
| }, | |
| { | |
| "epoch": 0.11182222222222223, | |
| "grad_norm": 215.90704345703125, | |
| "learning_rate": 3e-06, | |
| "loss": -41.5126, | |
| "step": 1258 | |
| }, | |
| { | |
| "epoch": 0.11191111111111111, | |
| "grad_norm": 242.8232879638672, | |
| "learning_rate": 3e-06, | |
| "loss": -42.0952, | |
| "step": 1259 | |
| }, | |
| { | |
| "epoch": 0.112, | |
| "grad_norm": 220.46678161621094, | |
| "learning_rate": 3e-06, | |
| "loss": -40.7929, | |
| "step": 1260 | |
| }, | |
| { | |
| "completion_length": 252.2291717529297, | |
| "epoch": 0.11208888888888889, | |
| "grad_norm": 112.288330078125, | |
| "learning_rate": 3e-06, | |
| "loss": -28.9692, | |
| "reward": 0.8333333730697632, | |
| "reward_std": 0.23116151988506317, | |
| "rewards/boxed_and_answer_tags_format_reward": 0.625, | |
| "rewards/correctness_reward_func_math": 0.2083333283662796, | |
| "step": 1261, | |
| "zero_std_ratio": 0.75 | |
| }, | |
| { | |
| "epoch": 0.11217777777777778, | |
| "grad_norm": 111.31488800048828, | |
| "learning_rate": 3e-06, | |
| "loss": -22.9427, | |
| "step": 1262 | |
| }, | |
| { | |
| "epoch": 0.11226666666666667, | |
| "grad_norm": 130.2353057861328, | |
| "learning_rate": 3e-06, | |
| "loss": -30.2762, | |
| "step": 1263 | |
| }, | |
| { | |
| "epoch": 0.11235555555555556, | |
| "grad_norm": 154.38973999023438, | |
| "learning_rate": 3e-06, | |
| "loss": -39.3815, | |
| "step": 1264 | |
| }, | |
| { | |
| "epoch": 0.11244444444444444, | |
| "grad_norm": 109.89620971679688, | |
| "learning_rate": 3e-06, | |
| "loss": -28.5723, | |
| "step": 1265 | |
| }, | |
| { | |
| "epoch": 0.11253333333333333, | |
| "grad_norm": 104.00005340576172, | |
| "learning_rate": 3e-06, | |
| "loss": -25.6052, | |
| "step": 1266 | |
| }, | |
| { | |
| "epoch": 0.11262222222222222, | |
| "grad_norm": 121.78623962402344, | |
| "learning_rate": 3e-06, | |
| "loss": -30.0983, | |
| "step": 1267 | |
| }, | |
| { | |
| "epoch": 0.11271111111111111, | |
| "grad_norm": 119.55603790283203, | |
| "learning_rate": 3e-06, | |
| "loss": -23.8716, | |
| "step": 1268 | |
| }, | |
| { | |
| "epoch": 0.1128, | |
| "grad_norm": 124.7007827758789, | |
| "learning_rate": 3e-06, | |
| "loss": -31.7822, | |
| "step": 1269 | |
| }, | |
| { | |
| "epoch": 0.11288888888888889, | |
| "grad_norm": 133.42088317871094, | |
| "learning_rate": 3e-06, | |
| "loss": -42.2768, | |
| "step": 1270 | |
| }, | |
| { | |
| "epoch": 0.11297777777777777, | |
| "grad_norm": 128.7488555908203, | |
| "learning_rate": 3e-06, | |
| "loss": -30.9053, | |
| "step": 1271 | |
| }, | |
| { | |
| "epoch": 0.11306666666666666, | |
| "grad_norm": 128.32147216796875, | |
| "learning_rate": 3e-06, | |
| "loss": -27.7986, | |
| "step": 1272 | |
| }, | |
| { | |
| "completion_length": 251.02083587646484, | |
| "epoch": 0.11315555555555555, | |
| "grad_norm": 276.54681396484375, | |
| "learning_rate": 3e-06, | |
| "loss": -8.3431, | |
| "reward": 1.7916666865348816, | |
| "reward_std": 0.4701542258262634, | |
| "rewards/boxed_and_answer_tags_format_reward": 0.75, | |
| "rewards/correctness_reward_func_math": 1.0416666567325592, | |
| "step": 1273, | |
| "zero_std_ratio": 0.5 | |
| }, | |
| { | |
| "epoch": 0.11324444444444444, | |
| "grad_norm": 234.2393341064453, | |
| "learning_rate": 3e-06, | |
| "loss": -13.7623, | |
| "step": 1274 | |
| }, | |
| { | |
| "epoch": 0.11333333333333333, | |
| "grad_norm": 236.026611328125, | |
| "learning_rate": 3e-06, | |
| "loss": -4.7751, | |
| "step": 1275 | |
| }, | |
| { | |
| "epoch": 0.11342222222222222, | |
| "grad_norm": 245.18170166015625, | |
| "learning_rate": 3e-06, | |
| "loss": -16.0847, | |
| "step": 1276 | |
| }, | |
| { | |
| "epoch": 0.1135111111111111, | |
| "grad_norm": 314.53057861328125, | |
| "learning_rate": 3e-06, | |
| "loss": -8.6135, | |
| "step": 1277 | |
| }, | |
| { | |
| "epoch": 0.1136, | |
| "grad_norm": 207.09188842773438, | |
| "learning_rate": 3e-06, | |
| "loss": -13.115, | |
| "step": 1278 | |
| }, | |
| { | |
| "epoch": 0.1136888888888889, | |
| "grad_norm": 252.8507080078125, | |
| "learning_rate": 3e-06, | |
| "loss": -12.3574, | |
| "step": 1279 | |
| }, | |
| { | |
| "epoch": 0.11377777777777778, | |
| "grad_norm": 241.2433319091797, | |
| "learning_rate": 3e-06, | |
| "loss": -18.4114, | |
| "step": 1280 | |
| }, | |
| { | |
| "epoch": 0.11386666666666667, | |
| "grad_norm": 217.98683166503906, | |
| "learning_rate": 3e-06, | |
| "loss": -7.0416, | |
| "step": 1281 | |
| }, | |
| { | |
| "epoch": 0.11395555555555556, | |
| "grad_norm": 252.15773010253906, | |
| "learning_rate": 3e-06, | |
| "loss": -19.5222, | |
| "step": 1282 | |
| }, | |
| { | |
| "epoch": 0.11404444444444445, | |
| "grad_norm": 227.17205810546875, | |
| "learning_rate": 3e-06, | |
| "loss": -13.2521, | |
| "step": 1283 | |
| }, | |
| { | |
| "epoch": 0.11413333333333334, | |
| "grad_norm": 235.24327087402344, | |
| "learning_rate": 3e-06, | |
| "loss": -17.9427, | |
| "step": 1284 | |
| }, | |
| { | |
| "completion_length": 253.25, | |
| "epoch": 0.11422222222222222, | |
| "grad_norm": 206.74746704101562, | |
| "learning_rate": 3e-06, | |
| "loss": 0.218, | |
| "reward": 1.7187500596046448, | |
| "reward_std": 0.5723656415939331, | |
| "rewards/boxed_and_answer_tags_format_reward": 0.6770833432674408, | |
| "rewards/correctness_reward_func_math": 1.0416666567325592, | |
| "step": 1285, | |
| "zero_std_ratio": 0.375 | |
| }, | |
| { | |
| "epoch": 0.11431111111111111, | |
| "grad_norm": 207.45079040527344, | |
| "learning_rate": 3e-06, | |
| "loss": 23.7089, | |
| "step": 1286 | |
| }, | |
| { | |
| "epoch": 0.1144, | |
| "grad_norm": 201.48416137695312, | |
| "learning_rate": 3e-06, | |
| "loss": 16.6222, | |
| "step": 1287 | |
| }, | |
| { | |
| "epoch": 0.11448888888888889, | |
| "grad_norm": 240.6068115234375, | |
| "learning_rate": 3e-06, | |
| "loss": 2.8229, | |
| "step": 1288 | |
| }, | |
| { | |
| "epoch": 0.11457777777777778, | |
| "grad_norm": 267.2186279296875, | |
| "learning_rate": 3e-06, | |
| "loss": -6.3361, | |
| "step": 1289 | |
| }, | |
| { | |
| "epoch": 0.11466666666666667, | |
| "grad_norm": 254.57681274414062, | |
| "learning_rate": 3e-06, | |
| "loss": 2.6077, | |
| "step": 1290 | |
| }, | |
| { | |
| "epoch": 0.11475555555555556, | |
| "grad_norm": 215.68649291992188, | |
| "learning_rate": 3e-06, | |
| "loss": -2.7734, | |
| "step": 1291 | |
| }, | |
| { | |
| "epoch": 0.11484444444444444, | |
| "grad_norm": 227.70590209960938, | |
| "learning_rate": 3e-06, | |
| "loss": 18.5952, | |
| "step": 1292 | |
| }, | |
| { | |
| "epoch": 0.11493333333333333, | |
| "grad_norm": 215.9313201904297, | |
| "learning_rate": 3e-06, | |
| "loss": 13.7655, | |
| "step": 1293 | |
| }, | |
| { | |
| "epoch": 0.11502222222222222, | |
| "grad_norm": 251.1554718017578, | |
| "learning_rate": 3e-06, | |
| "loss": -1.3305, | |
| "step": 1294 | |
| }, | |
| { | |
| "epoch": 0.11511111111111111, | |
| "grad_norm": 274.58538818359375, | |
| "learning_rate": 3e-06, | |
| "loss": -8.8694, | |
| "step": 1295 | |
| }, | |
| { | |
| "epoch": 0.1152, | |
| "grad_norm": 238.6010284423828, | |
| "learning_rate": 3e-06, | |
| "loss": -1.8695, | |
| "step": 1296 | |
| }, | |
| { | |
| "completion_length": 252.1666717529297, | |
| "epoch": 0.11528888888888889, | |
| "grad_norm": 220.25823974609375, | |
| "learning_rate": 3e-06, | |
| "loss": -7.0592, | |
| "reward": 1.0625, | |
| "reward_std": 0.39512956142425537, | |
| "rewards/boxed_and_answer_tags_format_reward": 0.6875, | |
| "rewards/correctness_reward_func_math": 0.375, | |
| "step": 1297, | |
| "zero_std_ratio": 0.625 | |
| }, | |
| { | |
| "epoch": 0.11537777777777777, | |
| "grad_norm": 190.90309143066406, | |
| "learning_rate": 3e-06, | |
| "loss": -21.8086, | |
| "step": 1298 | |
| }, | |
| { | |
| "epoch": 0.11546666666666666, | |
| "grad_norm": 167.63180541992188, | |
| "learning_rate": 3e-06, | |
| "loss": -10.4386, | |
| "step": 1299 | |
| }, | |
| { | |
| "epoch": 0.11555555555555555, | |
| "grad_norm": 196.72048950195312, | |
| "learning_rate": 3e-06, | |
| "loss": -10.7261, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 0.11564444444444444, | |
| "grad_norm": 243.5116424560547, | |
| "learning_rate": 3e-06, | |
| "loss": -21.7371, | |
| "step": 1301 | |
| }, | |
| { | |
| "epoch": 0.11573333333333333, | |
| "grad_norm": 163.6327667236328, | |
| "learning_rate": 3e-06, | |
| "loss": -21.7888, | |
| "step": 1302 | |
| }, | |
| { | |
| "epoch": 0.11582222222222222, | |
| "grad_norm": 217.04978942871094, | |
| "learning_rate": 3e-06, | |
| "loss": -7.9678, | |
| "step": 1303 | |
| }, | |
| { | |
| "epoch": 0.11591111111111112, | |
| "grad_norm": 182.5911865234375, | |
| "learning_rate": 3e-06, | |
| "loss": -22.9758, | |
| "step": 1304 | |
| }, | |
| { | |
| "epoch": 0.116, | |
| "grad_norm": 167.71888732910156, | |
| "learning_rate": 3e-06, | |
| "loss": -12.1331, | |
| "step": 1305 | |
| }, | |
| { | |
| "epoch": 0.1160888888888889, | |
| "grad_norm": 168.7008819580078, | |
| "learning_rate": 3e-06, | |
| "loss": -13.2942, | |
| "step": 1306 | |
| }, | |
| { | |
| "epoch": 0.11617777777777778, | |
| "grad_norm": 210.5468292236328, | |
| "learning_rate": 3e-06, | |
| "loss": -22.5768, | |
| "step": 1307 | |
| }, | |
| { | |
| "epoch": 0.11626666666666667, | |
| "grad_norm": 179.2998046875, | |
| "learning_rate": 3e-06, | |
| "loss": -23.8159, | |
| "step": 1308 | |
| }, | |
| { | |
| "completion_length": 254.5416717529297, | |
| "epoch": 0.11635555555555556, | |
| "grad_norm": 219.9465789794922, | |
| "learning_rate": 3e-06, | |
| "loss": 7.1814, | |
| "reward": 1.2916667461395264, | |
| "reward_std": 0.48936043679714203, | |
| "rewards/boxed_and_answer_tags_format_reward": 0.75, | |
| "rewards/correctness_reward_func_math": 0.5416666716337204, | |
| "step": 1309, | |
| "zero_std_ratio": 0.5 | |
| }, | |
| { | |
| "epoch": 0.11644444444444445, | |
| "grad_norm": 224.42604064941406, | |
| "learning_rate": 3e-06, | |
| "loss": 8.354, | |
| "step": 1310 | |
| }, | |
| { | |
| "epoch": 0.11653333333333334, | |
| "grad_norm": 262.8431091308594, | |
| "learning_rate": 3e-06, | |
| "loss": 5.5188, | |
| "step": 1311 | |
| }, | |
| { | |
| "epoch": 0.11662222222222222, | |
| "grad_norm": 446.6980285644531, | |
| "learning_rate": 3e-06, | |
| "loss": 7.2822, | |
| "step": 1312 | |
| }, | |
| { | |
| "epoch": 0.11671111111111111, | |
| "grad_norm": 200.77650451660156, | |
| "learning_rate": 3e-06, | |
| "loss": -0.5339, | |
| "step": 1313 | |
| }, | |
| { | |
| "epoch": 0.1168, | |
| "grad_norm": 230.9583740234375, | |
| "learning_rate": 3e-06, | |
| "loss": -3.6372, | |
| "step": 1314 | |
| }, | |
| { | |
| "epoch": 0.11688888888888889, | |
| "grad_norm": 257.7409362792969, | |
| "learning_rate": 3e-06, | |
| "loss": 5.1944, | |
| "step": 1315 | |
| }, | |
| { | |
| "epoch": 0.11697777777777778, | |
| "grad_norm": 215.5637664794922, | |
| "learning_rate": 3e-06, | |
| "loss": 6.6842, | |
| "step": 1316 | |
| }, | |
| { | |
| "epoch": 0.11706666666666667, | |
| "grad_norm": 228.1314697265625, | |
| "learning_rate": 3e-06, | |
| "loss": 3.1597, | |
| "step": 1317 | |
| }, | |
| { | |
| "epoch": 0.11715555555555555, | |
| "grad_norm": 227.79237365722656, | |
| "learning_rate": 3e-06, | |
| "loss": 6.0615, | |
| "step": 1318 | |
| }, | |
| { | |
| "epoch": 0.11724444444444444, | |
| "grad_norm": 364.67889404296875, | |
| "learning_rate": 3e-06, | |
| "loss": -4.7295, | |
| "step": 1319 | |
| }, | |
| { | |
| "epoch": 0.11733333333333333, | |
| "grad_norm": 435.36578369140625, | |
| "learning_rate": 3e-06, | |
| "loss": -7.3331, | |
| "step": 1320 | |
| }, | |
| { | |
| "completion_length": 254.12500762939453, | |
| "epoch": 0.11742222222222222, | |
| "grad_norm": 181.09559631347656, | |
| "learning_rate": 3e-06, | |
| "loss": 9.7639, | |
| "reward": 1.3437500596046448, | |
| "reward_std": 0.3936076909303665, | |
| "rewards/boxed_and_answer_tags_format_reward": 0.6770833432674408, | |
| "rewards/correctness_reward_func_math": 0.6666666865348816, | |
| "step": 1321, | |
| "zero_std_ratio": 0.5 | |
| }, | |
| { | |
| "epoch": 0.11751111111111111, | |
| "grad_norm": 184.9253387451172, | |
| "learning_rate": 3e-06, | |
| "loss": -7.8109, | |
| "step": 1322 | |
| }, | |
| { | |
| "epoch": 0.1176, | |
| "grad_norm": 178.65553283691406, | |
| "learning_rate": 3e-06, | |
| "loss": -0.5036, | |
| "step": 1323 | |
| }, | |
| { | |
| "epoch": 0.11768888888888888, | |
| "grad_norm": 172.2812042236328, | |
| "learning_rate": 3e-06, | |
| "loss": -5.0317, | |
| "step": 1324 | |
| }, | |
| { | |
| "epoch": 0.11777777777777777, | |
| "grad_norm": 189.1818084716797, | |
| "learning_rate": 3e-06, | |
| "loss": -5.0896, | |
| "step": 1325 | |
| }, | |
| { | |
| "epoch": 0.11786666666666666, | |
| "grad_norm": 195.15562438964844, | |
| "learning_rate": 3e-06, | |
| "loss": -6.7484, | |
| "step": 1326 | |
| }, | |
| { | |
| "epoch": 0.11795555555555555, | |
| "grad_norm": 180.81617736816406, | |
| "learning_rate": 3e-06, | |
| "loss": 6.0087, | |
| "step": 1327 | |
| }, | |
| { | |
| "epoch": 0.11804444444444444, | |
| "grad_norm": 181.2151336669922, | |
| "learning_rate": 3e-06, | |
| "loss": -12.1462, | |
| "step": 1328 | |
| }, | |
| { | |
| "epoch": 0.11813333333333334, | |
| "grad_norm": 232.097900390625, | |
| "learning_rate": 3e-06, | |
| "loss": -6.1373, | |
| "step": 1329 | |
| }, | |
| { | |
| "epoch": 0.11822222222222223, | |
| "grad_norm": 180.55462646484375, | |
| "learning_rate": 3e-06, | |
| "loss": -10.2139, | |
| "step": 1330 | |
| }, | |
| { | |
| "epoch": 0.11831111111111112, | |
| "grad_norm": 192.82818603515625, | |
| "learning_rate": 3e-06, | |
| "loss": -11.1656, | |
| "step": 1331 | |
| }, | |
| { | |
| "epoch": 0.1184, | |
| "grad_norm": 190.95399475097656, | |
| "learning_rate": 3e-06, | |
| "loss": -13.1171, | |
| "step": 1332 | |
| }, | |
| { | |
| "completion_length": 254.81250762939453, | |
| "epoch": 0.1184888888888889, | |
| "grad_norm": 65.9253921508789, | |
| "learning_rate": 3e-06, | |
| "loss": 2.6383, | |
| "reward": 0.9791666865348816, | |
| "reward_std": 0.10206206887960434, | |
| "rewards/boxed_and_answer_tags_format_reward": 0.6875, | |
| "rewards/correctness_reward_func_math": 0.2916666679084301, | |
| "step": 1333, | |
| "zero_std_ratio": 0.875 | |
| }, | |
| { | |
| "epoch": 0.11857777777777778, | |
| "grad_norm": 66.89152526855469, | |
| "learning_rate": 3e-06, | |
| "loss": 0.0904, | |
| "step": 1334 | |
| }, | |
| { | |
| "epoch": 0.11866666666666667, | |
| "grad_norm": 52.20970916748047, | |
| "learning_rate": 3e-06, | |
| "loss": -2.1923, | |
| "step": 1335 | |
| }, | |
| { | |
| "epoch": 0.11875555555555556, | |
| "grad_norm": 62.94733810424805, | |
| "learning_rate": 3e-06, | |
| "loss": -0.1938, | |
| "step": 1336 | |
| }, | |
| { | |
| "epoch": 0.11884444444444445, | |
| "grad_norm": 74.16914367675781, | |
| "learning_rate": 3e-06, | |
| "loss": -0.0211, | |
| "step": 1337 | |
| }, | |
| { | |
| "epoch": 0.11893333333333334, | |
| "grad_norm": 60.86524963378906, | |
| "learning_rate": 3e-06, | |
| "loss": 1.9824, | |
| "step": 1338 | |
| }, | |
| { | |
| "epoch": 0.11902222222222222, | |
| "grad_norm": 68.02703857421875, | |
| "learning_rate": 3e-06, | |
| "loss": 1.8138, | |
| "step": 1339 | |
| }, | |
| { | |
| "epoch": 0.11911111111111111, | |
| "grad_norm": 65.81590270996094, | |
| "learning_rate": 3e-06, | |
| "loss": -0.8801, | |
| "step": 1340 | |
| }, | |
| { | |
| "epoch": 0.1192, | |
| "grad_norm": 49.24789810180664, | |
| "learning_rate": 3e-06, | |
| "loss": -2.9454, | |
| "step": 1341 | |
| }, | |
| { | |
| "epoch": 0.11928888888888889, | |
| "grad_norm": 66.03591918945312, | |
| "learning_rate": 3e-06, | |
| "loss": -2.1792, | |
| "step": 1342 | |
| }, | |
| { | |
| "epoch": 0.11937777777777778, | |
| "grad_norm": 68.27337646484375, | |
| "learning_rate": 3e-06, | |
| "loss": -1.5692, | |
| "step": 1343 | |
| }, | |
| { | |
| "epoch": 0.11946666666666667, | |
| "grad_norm": 76.02365112304688, | |
| "learning_rate": 3e-06, | |
| "loss": 1.3993, | |
| "step": 1344 | |
| }, | |
| { | |
| "completion_length": 242.02084350585938, | |
| "epoch": 0.11955555555555555, | |
| "grad_norm": 159.37770080566406, | |
| "learning_rate": 3e-06, | |
| "loss": -16.5166, | |
| "reward": 1.3125, | |
| "reward_std": 0.43528567254543304, | |
| "rewards/boxed_and_answer_tags_format_reward": 0.6875, | |
| "rewards/correctness_reward_func_math": 0.625, | |
| "step": 1345, | |
| "zero_std_ratio": 0.5 | |
| }, | |
| { | |
| "epoch": 0.11964444444444444, | |
| "grad_norm": 156.10609436035156, | |
| "learning_rate": 3e-06, | |
| "loss": -19.2254, | |
| "step": 1346 | |
| }, | |
| { | |
| "epoch": 0.11973333333333333, | |
| "grad_norm": 159.9058074951172, | |
| "learning_rate": 3e-06, | |
| "loss": -21.4448, | |
| "step": 1347 | |
| }, | |
| { | |
| "epoch": 0.11982222222222222, | |
| "grad_norm": 217.29722595214844, | |
| "learning_rate": 3e-06, | |
| "loss": -22.2701, | |
| "step": 1348 | |
| }, | |
| { | |
| "epoch": 0.11991111111111111, | |
| "grad_norm": 220.8108673095703, | |
| "learning_rate": 3e-06, | |
| "loss": -9.9609, | |
| "step": 1349 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "grad_norm": 255.4961395263672, | |
| "learning_rate": 3e-06, | |
| "loss": -10.7417, | |
| "step": 1350 | |
| }, | |
| { | |
| "epoch": 0.12008888888888888, | |
| "grad_norm": 159.0548858642578, | |
| "learning_rate": 3e-06, | |
| "loss": -18.3206, | |
| "step": 1351 | |
| }, | |
| { | |
| "epoch": 0.12017777777777777, | |
| "grad_norm": 151.7304229736328, | |
| "learning_rate": 3e-06, | |
| "loss": -21.7915, | |
| "step": 1352 | |
| }, | |
| { | |
| "epoch": 0.12026666666666666, | |
| "grad_norm": 162.3264923095703, | |
| "learning_rate": 3e-06, | |
| "loss": -24.2052, | |
| "step": 1353 | |
| }, | |
| { | |
| "epoch": 0.12035555555555555, | |
| "grad_norm": 188.41310119628906, | |
| "learning_rate": 3e-06, | |
| "loss": -26.0542, | |
| "step": 1354 | |
| }, | |
| { | |
| "epoch": 0.12044444444444445, | |
| "grad_norm": 182.1976776123047, | |
| "learning_rate": 3e-06, | |
| "loss": -14.6291, | |
| "step": 1355 | |
| }, | |
| { | |
| "epoch": 0.12053333333333334, | |
| "grad_norm": 227.4834747314453, | |
| "learning_rate": 3e-06, | |
| "loss": -16.78, | |
| "step": 1356 | |
| }, | |
| { | |
| "completion_length": 236.93750762939453, | |
| "epoch": 0.12062222222222223, | |
| "grad_norm": 244.82566833496094, | |
| "learning_rate": 3e-06, | |
| "loss": -29.5738, | |
| "reward": 2.125, | |
| "reward_std": 0.39512956142425537, | |
| "rewards/boxed_and_answer_tags_format_reward": 0.75, | |
| "rewards/correctness_reward_func_math": 1.375, | |
| "step": 1357, | |
| "zero_std_ratio": 0.625 | |
| }, | |
| { | |
| "epoch": 0.12071111111111112, | |
| "grad_norm": 560.8680419921875, | |
| "learning_rate": 3e-06, | |
| "loss": -35.2668, | |
| "step": 1358 | |
| }, | |
| { | |
| "epoch": 0.1208, | |
| "grad_norm": 303.6029968261719, | |
| "learning_rate": 3e-06, | |
| "loss": -21.3551, | |
| "step": 1359 | |
| }, | |
| { | |
| "epoch": 0.12088888888888889, | |
| "grad_norm": 247.47055053710938, | |
| "learning_rate": 3e-06, | |
| "loss": -30.3557, | |
| "step": 1360 | |
| }, | |
| { | |
| "epoch": 0.12097777777777778, | |
| "grad_norm": 311.3307189941406, | |
| "learning_rate": 3e-06, | |
| "loss": -31.387, | |
| "step": 1361 | |
| }, | |
| { | |
| "epoch": 0.12106666666666667, | |
| "grad_norm": 296.8590087890625, | |
| "learning_rate": 3e-06, | |
| "loss": -33.6326, | |
| "step": 1362 | |
| }, | |
| { | |
| "epoch": 0.12115555555555556, | |
| "grad_norm": 253.0756378173828, | |
| "learning_rate": 3e-06, | |
| "loss": -31.4888, | |
| "step": 1363 | |
| }, | |
| { | |
| "epoch": 0.12124444444444445, | |
| "grad_norm": 293.0926513671875, | |
| "learning_rate": 3e-06, | |
| "loss": -38.9989, | |
| "step": 1364 | |
| }, | |
| { | |
| "epoch": 0.12133333333333333, | |
| "grad_norm": 318.564208984375, | |
| "learning_rate": 3e-06, | |
| "loss": -25.8663, | |
| "step": 1365 | |
| }, | |
| { | |
| "epoch": 0.12142222222222222, | |
| "grad_norm": 262.5808410644531, | |
| "learning_rate": 3e-06, | |
| "loss": -33.3059, | |
| "step": 1366 | |
| }, | |
| { | |
| "epoch": 0.12151111111111111, | |
| "grad_norm": 297.9688720703125, | |
| "learning_rate": 3e-06, | |
| "loss": -36.1247, | |
| "step": 1367 | |
| }, | |
| { | |
| "epoch": 0.1216, | |
| "grad_norm": 319.11968994140625, | |
| "learning_rate": 3e-06, | |
| "loss": -37.8042, | |
| "step": 1368 | |
| }, | |
| { | |
| "completion_length": 241.52083587646484, | |
| "epoch": 0.12168888888888889, | |
| "grad_norm": 149.28683471679688, | |
| "learning_rate": 3e-06, | |
| "loss": -5.2185, | |
| "reward": 0.979166716337204, | |
| "reward_std": 0.306186206638813, | |
| "rewards/boxed_and_answer_tags_format_reward": 0.6875, | |
| "rewards/correctness_reward_func_math": 0.2916666641831398, | |
| "step": 1369, | |
| "zero_std_ratio": 0.625 | |
| }, | |
| { | |
| "epoch": 0.12177777777777778, | |
| "grad_norm": 234.47120666503906, | |
| "learning_rate": 3e-06, | |
| "loss": -0.5601, | |
| "step": 1370 | |
| }, | |
| { | |
| "epoch": 0.12186666666666666, | |
| "grad_norm": 168.30697631835938, | |
| "learning_rate": 3e-06, | |
| "loss": -6.9666, | |
| "step": 1371 | |
| }, | |
| { | |
| "epoch": 0.12195555555555555, | |
| "grad_norm": 304.0892333984375, | |
| "learning_rate": 3e-06, | |
| "loss": -11.5806, | |
| "step": 1372 | |
| }, | |
| { | |
| "epoch": 0.12204444444444444, | |
| "grad_norm": 210.6204071044922, | |
| "learning_rate": 3e-06, | |
| "loss": 3.7489, | |
| "step": 1373 | |
| }, | |
| { | |
| "epoch": 0.12213333333333333, | |
| "grad_norm": 227.05795288085938, | |
| "learning_rate": 3e-06, | |
| "loss": -19.6276, | |
| "step": 1374 | |
| }, | |
| { | |
| "epoch": 0.12222222222222222, | |
| "grad_norm": 148.8232879638672, | |
| "learning_rate": 3e-06, | |
| "loss": -8.3126, | |
| "step": 1375 | |
| }, | |
| { | |
| "epoch": 0.1223111111111111, | |
| "grad_norm": 216.28646850585938, | |
| "learning_rate": 3e-06, | |
| "loss": -4.662, | |
| "step": 1376 | |
| }, | |
| { | |
| "epoch": 0.1224, | |
| "grad_norm": 196.22518920898438, | |
| "learning_rate": 3e-06, | |
| "loss": -10.8248, | |
| "step": 1377 | |
| }, | |
| { | |
| "epoch": 0.12248888888888888, | |
| "grad_norm": 238.0521697998047, | |
| "learning_rate": 3e-06, | |
| "loss": -16.9448, | |
| "step": 1378 | |
| }, | |
| { | |
| "epoch": 0.12257777777777777, | |
| "grad_norm": 195.62860107421875, | |
| "learning_rate": 3e-06, | |
| "loss": -0.2301, | |
| "step": 1379 | |
| }, | |
| { | |
| "epoch": 0.12266666666666666, | |
| "grad_norm": 201.47740173339844, | |
| "learning_rate": 3e-06, | |
| "loss": -26.1931, | |
| "step": 1380 | |
| }, | |
| { | |
| "completion_length": 223.375, | |
| "epoch": 0.12275555555555556, | |
| "grad_norm": 340.7679443359375, | |
| "learning_rate": 3e-06, | |
| "loss": -35.7739, | |
| "reward": 1.5000000596046448, | |
| "reward_std": 0.4779854714870453, | |
| "rewards/boxed_and_answer_tags_format_reward": 0.75, | |
| "rewards/correctness_reward_func_math": 0.75, | |
| "step": 1381, | |
| "zero_std_ratio": 0.5 | |
| }, | |
| { | |
| "epoch": 0.12284444444444445, | |
| "grad_norm": 357.4112548828125, | |
| "learning_rate": 3e-06, | |
| "loss": -25.5447, | |
| "step": 1382 | |
| }, | |
| { | |
| "epoch": 0.12293333333333334, | |
| "grad_norm": 355.23419189453125, | |
| "learning_rate": 3e-06, | |
| "loss": -29.5073, | |
| "step": 1383 | |
| }, | |
| { | |
| "epoch": 0.12302222222222223, | |
| "grad_norm": 362.6241455078125, | |
| "learning_rate": 3e-06, | |
| "loss": -15.9052, | |
| "step": 1384 | |
| }, | |
| { | |
| "epoch": 0.12311111111111112, | |
| "grad_norm": 388.32904052734375, | |
| "learning_rate": 3e-06, | |
| "loss": -16.7547, | |
| "step": 1385 | |
| }, | |
| { | |
| "epoch": 0.1232, | |
| "grad_norm": 398.5157775878906, | |
| "learning_rate": 3e-06, | |
| "loss": -32.0836, | |
| "step": 1386 | |
| }, | |
| { | |
| "epoch": 0.12328888888888889, | |
| "grad_norm": 256.07763671875, | |
| "learning_rate": 3e-06, | |
| "loss": -36.567, | |
| "step": 1387 | |
| }, | |
| { | |
| "epoch": 0.12337777777777778, | |
| "grad_norm": 273.55108642578125, | |
| "learning_rate": 3e-06, | |
| "loss": -32.0325, | |
| "step": 1388 | |
| }, | |
| { | |
| "epoch": 0.12346666666666667, | |
| "grad_norm": 298.35675048828125, | |
| "learning_rate": 3e-06, | |
| "loss": -33.9138, | |
| "step": 1389 | |
| }, | |
| { | |
| "epoch": 0.12355555555555556, | |
| "grad_norm": 319.2604064941406, | |
| "learning_rate": 3e-06, | |
| "loss": -22.5196, | |
| "step": 1390 | |
| }, | |
| { | |
| "epoch": 0.12364444444444445, | |
| "grad_norm": 321.7362976074219, | |
| "learning_rate": 3e-06, | |
| "loss": -22.5907, | |
| "step": 1391 | |
| }, | |
| { | |
| "epoch": 0.12373333333333333, | |
| "grad_norm": 289.5376281738281, | |
| "learning_rate": 3e-06, | |
| "loss": -39.4107, | |
| "step": 1392 | |
| }, | |
| { | |
| "completion_length": 239.6041717529297, | |
| "epoch": 0.12382222222222222, | |
| "grad_norm": 189.32923889160156, | |
| "learning_rate": 3e-06, | |
| "loss": 33.0773, | |
| "reward": 1.3541666865348816, | |
| "reward_std": 0.23899273574352264, | |
| "rewards/boxed_and_answer_tags_format_reward": 0.6875, | |
| "rewards/correctness_reward_func_math": 0.6666666567325592, | |
| "step": 1393, | |
| "zero_std_ratio": 0.75 | |
| }, | |
| { | |
| "epoch": 0.12391111111111111, | |
| "grad_norm": 280.063720703125, | |
| "learning_rate": 3e-06, | |
| "loss": 28.1537, | |
| "step": 1394 | |
| }, | |
| { | |
| "epoch": 0.124, | |
| "grad_norm": 239.7598114013672, | |
| "learning_rate": 3e-06, | |
| "loss": 30.4262, | |
| "step": 1395 | |
| }, | |
| { | |
| "epoch": 0.12408888888888889, | |
| "grad_norm": 230.4679718017578, | |
| "learning_rate": 3e-06, | |
| "loss": 26.9258, | |
| "step": 1396 | |
| }, | |
| { | |
| "epoch": 0.12417777777777778, | |
| "grad_norm": 241.6261444091797, | |
| "learning_rate": 3e-06, | |
| "loss": 39.8305, | |
| "step": 1397 | |
| }, | |
| { | |
| "epoch": 0.12426666666666666, | |
| "grad_norm": 305.56097412109375, | |
| "learning_rate": 3e-06, | |
| "loss": 29.9909, | |
| "step": 1398 | |
| }, | |
| { | |
| "epoch": 0.12435555555555555, | |
| "grad_norm": 190.80581665039062, | |
| "learning_rate": 3e-06, | |
| "loss": 28.9829, | |
| "step": 1399 | |
| }, | |
| { | |
| "epoch": 0.12444444444444444, | |
| "grad_norm": 272.18365478515625, | |
| "learning_rate": 3e-06, | |
| "loss": 21.7391, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 0.12453333333333333, | |
| "grad_norm": 236.96322631835938, | |
| "learning_rate": 3e-06, | |
| "loss": 24.1388, | |
| "step": 1401 | |
| }, | |
| { | |
| "epoch": 0.12462222222222222, | |
| "grad_norm": 257.829833984375, | |
| "learning_rate": 3e-06, | |
| "loss": 19.7315, | |
| "step": 1402 | |
| }, | |
| { | |
| "epoch": 0.1247111111111111, | |
| "grad_norm": 237.62989807128906, | |
| "learning_rate": 3e-06, | |
| "loss": 32.9331, | |
| "step": 1403 | |
| }, | |
| { | |
| "epoch": 0.1248, | |
| "grad_norm": 299.9111633300781, | |
| "learning_rate": 3e-06, | |
| "loss": 20.7263, | |
| "step": 1404 | |
| }, | |
| { | |
| "completion_length": 253.62500762939453, | |
| "epoch": 0.12488888888888888, | |
| "grad_norm": 351.87091064453125, | |
| "learning_rate": 3e-06, | |
| "loss": 1.3314, | |
| "reward": 1.5416666865348816, | |
| "reward_std": 0.48936042189598083, | |
| "rewards/boxed_and_answer_tags_format_reward": 0.75, | |
| "rewards/correctness_reward_func_math": 0.7916666716337204, | |
| "step": 1405, | |
| "zero_std_ratio": 0.5 | |
| }, | |
| { | |
| "epoch": 0.12497777777777777, | |
| "grad_norm": 318.410888671875, | |
| "learning_rate": 3e-06, | |
| "loss": -6.872, | |
| "step": 1406 | |
| }, | |
| { | |
| "epoch": 0.12506666666666666, | |
| "grad_norm": 296.7760925292969, | |
| "learning_rate": 3e-06, | |
| "loss": 7.1205, | |
| "step": 1407 | |
| }, | |
| { | |
| "epoch": 0.12515555555555555, | |
| "grad_norm": 264.91400146484375, | |
| "learning_rate": 3e-06, | |
| "loss": 18.3195, | |
| "step": 1408 | |
| }, | |
| { | |
| "epoch": 0.12524444444444444, | |
| "grad_norm": 309.8560485839844, | |
| "learning_rate": 3e-06, | |
| "loss": -9.7835, | |
| "step": 1409 | |
| }, | |
| { | |
| "epoch": 0.12533333333333332, | |
| "grad_norm": 343.082763671875, | |
| "learning_rate": 3e-06, | |
| "loss": 8.5194, | |
| "step": 1410 | |
| }, | |
| { | |
| "epoch": 0.1254222222222222, | |
| "grad_norm": 349.1229248046875, | |
| "learning_rate": 3e-06, | |
| "loss": -2.322, | |
| "step": 1411 | |
| }, | |
| { | |
| "epoch": 0.1255111111111111, | |
| "grad_norm": 343.0054626464844, | |
| "learning_rate": 3e-06, | |
| "loss": -10.4852, | |
| "step": 1412 | |
| }, | |
| { | |
| "epoch": 0.1256, | |
| "grad_norm": 293.6500549316406, | |
| "learning_rate": 3e-06, | |
| "loss": 3.7367, | |
| "step": 1413 | |
| }, | |
| { | |
| "epoch": 0.12568888888888888, | |
| "grad_norm": 244.24459838867188, | |
| "learning_rate": 3e-06, | |
| "loss": 12.8548, | |
| "step": 1414 | |
| }, | |
| { | |
| "epoch": 0.12577777777777777, | |
| "grad_norm": 308.9840393066406, | |
| "learning_rate": 3e-06, | |
| "loss": -14.9646, | |
| "step": 1415 | |
| }, | |
| { | |
| "epoch": 0.12586666666666665, | |
| "grad_norm": 302.1766052246094, | |
| "learning_rate": 3e-06, | |
| "loss": 4.8047, | |
| "step": 1416 | |
| }, | |
| { | |
| "completion_length": 249.6666717529297, | |
| "epoch": 0.12595555555555554, | |
| "grad_norm": 189.9821319580078, | |
| "learning_rate": 3e-06, | |
| "loss": -21.868, | |
| "reward": 1.4583333730697632, | |
| "reward_std": 0.4701542258262634, | |
| "rewards/boxed_and_answer_tags_format_reward": 0.75, | |
| "rewards/correctness_reward_func_math": 0.7083333134651184, | |
| "step": 1417, | |
| "zero_std_ratio": 0.5 | |
| }, | |
| { | |
| "epoch": 0.12604444444444443, | |
| "grad_norm": 230.71632385253906, | |
| "learning_rate": 3e-06, | |
| "loss": -6.3819, | |
| "step": 1418 | |
| }, | |
| { | |
| "epoch": 0.12613333333333332, | |
| "grad_norm": 245.25421142578125, | |
| "learning_rate": 3e-06, | |
| "loss": 10.5483, | |
| "step": 1419 | |
| }, | |
| { | |
| "epoch": 0.12622222222222224, | |
| "grad_norm": 153.20816040039062, | |
| "learning_rate": 3e-06, | |
| "loss": -8.6118, | |
| "step": 1420 | |
| }, | |
| { | |
| "epoch": 0.12631111111111112, | |
| "grad_norm": 167.58921813964844, | |
| "learning_rate": 3e-06, | |
| "loss": -8.8097, | |
| "step": 1421 | |
| }, | |
| { | |
| "epoch": 0.1264, | |
| "grad_norm": 190.3168182373047, | |
| "learning_rate": 3e-06, | |
| "loss": -14.9707, | |
| "step": 1422 | |
| }, | |
| { | |
| "epoch": 0.1264888888888889, | |
| "grad_norm": 191.2883758544922, | |
| "learning_rate": 3e-06, | |
| "loss": -23.6776, | |
| "step": 1423 | |
| }, | |
| { | |
| "epoch": 0.1265777777777778, | |
| "grad_norm": 223.39498901367188, | |
| "learning_rate": 3e-06, | |
| "loss": -8.999, | |
| "step": 1424 | |
| }, | |
| { | |
| "epoch": 0.12666666666666668, | |
| "grad_norm": 228.12818908691406, | |
| "learning_rate": 3e-06, | |
| "loss": 7.6836, | |
| "step": 1425 | |
| }, | |
| { | |
| "epoch": 0.12675555555555557, | |
| "grad_norm": 165.7634735107422, | |
| "learning_rate": 3e-06, | |
| "loss": -12.3289, | |
| "step": 1426 | |
| }, | |
| { | |
| "epoch": 0.12684444444444445, | |
| "grad_norm": 169.15396118164062, | |
| "learning_rate": 3e-06, | |
| "loss": -13.59, | |
| "step": 1427 | |
| }, | |
| { | |
| "epoch": 0.12693333333333334, | |
| "grad_norm": 204.25228881835938, | |
| "learning_rate": 3e-06, | |
| "loss": -19.9151, | |
| "step": 1428 | |
| }, | |
| { | |
| "completion_length": 228.02083587646484, | |
| "epoch": 0.12702222222222223, | |
| "grad_norm": 167.8898468017578, | |
| "learning_rate": 3e-06, | |
| "loss": 7.6334, | |
| "reward": 1.2083333432674408, | |
| "reward_std": 0.3602609783411026, | |
| "rewards/boxed_and_answer_tags_format_reward": 0.75, | |
| "rewards/correctness_reward_func_math": 0.4583333358168602, | |
| "step": 1429, | |
| "zero_std_ratio": 0.625 | |
| }, | |
| { | |
| "epoch": 0.12711111111111112, | |
| "grad_norm": 224.77040100097656, | |
| "learning_rate": 3e-06, | |
| "loss": -1.7483, | |
| "step": 1430 | |
| }, | |
| { | |
| "epoch": 0.1272, | |
| "grad_norm": 220.69076538085938, | |
| "learning_rate": 3e-06, | |
| "loss": 14.763, | |
| "step": 1431 | |
| }, | |
| { | |
| "epoch": 0.1272888888888889, | |
| "grad_norm": 150.462646484375, | |
| "learning_rate": 3e-06, | |
| "loss": 17.6324, | |
| "step": 1432 | |
| }, | |
| { | |
| "epoch": 0.12737777777777778, | |
| "grad_norm": 203.99217224121094, | |
| "learning_rate": 3e-06, | |
| "loss": 17.7821, | |
| "step": 1433 | |
| }, | |
| { | |
| "epoch": 0.12746666666666667, | |
| "grad_norm": 188.85665893554688, | |
| "learning_rate": 3e-06, | |
| "loss": 13.1598, | |
| "step": 1434 | |
| }, | |
| { | |
| "epoch": 0.12755555555555556, | |
| "grad_norm": 155.11329650878906, | |
| "learning_rate": 3e-06, | |
| "loss": 6.2584, | |
| "step": 1435 | |
| }, | |
| { | |
| "epoch": 0.12764444444444445, | |
| "grad_norm": 205.92800903320312, | |
| "learning_rate": 3e-06, | |
| "loss": -3.0899, | |
| "step": 1436 | |
| }, | |
| { | |
| "epoch": 0.12773333333333334, | |
| "grad_norm": 201.03298950195312, | |
| "learning_rate": 3e-06, | |
| "loss": 12.9818, | |
| "step": 1437 | |
| }, | |
| { | |
| "epoch": 0.12782222222222223, | |
| "grad_norm": 167.153076171875, | |
| "learning_rate": 3e-06, | |
| "loss": 15.1051, | |
| "step": 1438 | |
| }, | |
| { | |
| "epoch": 0.12791111111111111, | |
| "grad_norm": 223.22909545898438, | |
| "learning_rate": 3e-06, | |
| "loss": 14.2815, | |
| "step": 1439 | |
| }, | |
| { | |
| "epoch": 0.128, | |
| "grad_norm": 173.97694396972656, | |
| "learning_rate": 3e-06, | |
| "loss": 9.4034, | |
| "step": 1440 | |
| }, | |
| { | |
| "completion_length": 239.5416717529297, | |
| "epoch": 0.1280888888888889, | |
| "grad_norm": 299.9499816894531, | |
| "learning_rate": 3e-06, | |
| "loss": -27.7606, | |
| "reward": 1.5208333730697632, | |
| "reward_std": 0.3680921643972397, | |
| "rewards/boxed_and_answer_tags_format_reward": 0.6875, | |
| "rewards/correctness_reward_func_math": 0.8333333432674408, | |
| "step": 1441, | |
| "zero_std_ratio": 0.625 | |
| }, | |
| { | |
| "epoch": 0.12817777777777778, | |
| "grad_norm": 236.49612426757812, | |
| "learning_rate": 3e-06, | |
| "loss": -27.7132, | |
| "step": 1442 | |
| }, | |
| { | |
| "epoch": 0.12826666666666667, | |
| "grad_norm": 186.70510864257812, | |
| "learning_rate": 3e-06, | |
| "loss": -28.2186, | |
| "step": 1443 | |
| }, | |
| { | |
| "epoch": 0.12835555555555556, | |
| "grad_norm": 294.79656982421875, | |
| "learning_rate": 3e-06, | |
| "loss": -18.5453, | |
| "step": 1444 | |
| }, | |
| { | |
| "epoch": 0.12844444444444444, | |
| "grad_norm": 257.8788146972656, | |
| "learning_rate": 3e-06, | |
| "loss": -26.3342, | |
| "step": 1445 | |
| }, | |
| { | |
| "epoch": 0.12853333333333333, | |
| "grad_norm": 280.1625061035156, | |
| "learning_rate": 3e-06, | |
| "loss": -37.5225, | |
| "step": 1446 | |
| }, | |
| { | |
| "epoch": 0.12862222222222222, | |
| "grad_norm": 257.7731018066406, | |
| "learning_rate": 3e-06, | |
| "loss": -31.1507, | |
| "step": 1447 | |
| }, | |
| { | |
| "epoch": 0.1287111111111111, | |
| "grad_norm": 221.82879638671875, | |
| "learning_rate": 3e-06, | |
| "loss": -30.7869, | |
| "step": 1448 | |
| }, | |
| { | |
| "epoch": 0.1288, | |
| "grad_norm": 227.20188903808594, | |
| "learning_rate": 3e-06, | |
| "loss": -31.8637, | |
| "step": 1449 | |
| }, | |
| { | |
| "epoch": 0.1288888888888889, | |
| "grad_norm": 319.30633544921875, | |
| "learning_rate": 3e-06, | |
| "loss": -24.0296, | |
| "step": 1450 | |
| }, | |
| { | |
| "epoch": 0.12897777777777777, | |
| "grad_norm": 206.82269287109375, | |
| "learning_rate": 3e-06, | |
| "loss": -28.7864, | |
| "step": 1451 | |
| }, | |
| { | |
| "epoch": 0.12906666666666666, | |
| "grad_norm": 196.76171875, | |
| "learning_rate": 3e-06, | |
| "loss": -42.0569, | |
| "step": 1452 | |
| }, | |
| { | |
| "completion_length": 248.1666717529297, | |
| "epoch": 0.12915555555555555, | |
| "grad_norm": 210.2032928466797, | |
| "learning_rate": 3e-06, | |
| "loss": -15.8435, | |
| "reward": 1.5000000596046448, | |
| "reward_std": 0.3680921941995621, | |
| "rewards/boxed_and_answer_tags_format_reward": 0.75, | |
| "rewards/correctness_reward_func_math": 0.75, | |
| "step": 1453, | |
| "zero_std_ratio": 0.625 | |
| }, | |
| { | |
| "epoch": 0.12924444444444444, | |
| "grad_norm": 209.77688598632812, | |
| "learning_rate": 3e-06, | |
| "loss": -14.2805, | |
| "step": 1454 | |
| }, | |
| { | |
| "epoch": 0.12933333333333333, | |
| "grad_norm": 662.65966796875, | |
| "learning_rate": 3e-06, | |
| "loss": -15.5908, | |
| "step": 1455 | |
| }, | |
| { | |
| "epoch": 0.12942222222222222, | |
| "grad_norm": 176.72958374023438, | |
| "learning_rate": 3e-06, | |
| "loss": -15.1466, | |
| "step": 1456 | |
| }, | |
| { | |
| "epoch": 0.1295111111111111, | |
| "grad_norm": 224.30841064453125, | |
| "learning_rate": 3e-06, | |
| "loss": -23.5471, | |
| "step": 1457 | |
| }, | |
| { | |
| "epoch": 0.1296, | |
| "grad_norm": 187.04263305664062, | |
| "learning_rate": 3e-06, | |
| "loss": -15.5373, | |
| "step": 1458 | |
| }, | |
| { | |
| "epoch": 0.12968888888888888, | |
| "grad_norm": 216.18629455566406, | |
| "learning_rate": 3e-06, | |
| "loss": -19.5843, | |
| "step": 1459 | |
| }, | |
| { | |
| "epoch": 0.12977777777777777, | |
| "grad_norm": 204.7811279296875, | |
| "learning_rate": 3e-06, | |
| "loss": -17.6737, | |
| "step": 1460 | |
| }, | |
| { | |
| "epoch": 0.12986666666666666, | |
| "grad_norm": 357.2877197265625, | |
| "learning_rate": 3e-06, | |
| "loss": -19.0236, | |
| "step": 1461 | |
| }, | |
| { | |
| "epoch": 0.12995555555555555, | |
| "grad_norm": 173.1217803955078, | |
| "learning_rate": 3e-06, | |
| "loss": -18.2209, | |
| "step": 1462 | |
| }, | |
| { | |
| "epoch": 0.13004444444444443, | |
| "grad_norm": 202.11126708984375, | |
| "learning_rate": 3e-06, | |
| "loss": -26.1672, | |
| "step": 1463 | |
| }, | |
| { | |
| "epoch": 0.13013333333333332, | |
| "grad_norm": 215.072265625, | |
| "learning_rate": 3e-06, | |
| "loss": -18.211, | |
| "step": 1464 | |
| }, | |
| { | |
| "completion_length": 248.45833587646484, | |
| "epoch": 0.1302222222222222, | |
| "grad_norm": 189.24551391601562, | |
| "learning_rate": 3e-06, | |
| "loss": -26.8632, | |
| "reward": 1.1041666865348816, | |
| "reward_std": 0.23899272084236145, | |
| "rewards/boxed_and_answer_tags_format_reward": 0.6875, | |
| "rewards/correctness_reward_func_math": 0.4166666716337204, | |
| "step": 1465, | |
| "zero_std_ratio": 0.75 | |
| }, | |
| { | |
| "epoch": 0.1303111111111111, | |
| "grad_norm": 150.01397705078125, | |
| "learning_rate": 3e-06, | |
| "loss": -36.2487, | |
| "step": 1466 | |
| }, | |
| { | |
| "epoch": 0.1304, | |
| "grad_norm": 239.17184448242188, | |
| "learning_rate": 3e-06, | |
| "loss": -32.3168, | |
| "step": 1467 | |
| }, | |
| { | |
| "epoch": 0.13048888888888888, | |
| "grad_norm": 214.16525268554688, | |
| "learning_rate": 3e-06, | |
| "loss": -36.6605, | |
| "step": 1468 | |
| }, | |
| { | |
| "epoch": 0.13057777777777776, | |
| "grad_norm": 200.94650268554688, | |
| "learning_rate": 3e-06, | |
| "loss": -23.324, | |
| "step": 1469 | |
| }, | |
| { | |
| "epoch": 0.13066666666666665, | |
| "grad_norm": 202.95838928222656, | |
| "learning_rate": 3e-06, | |
| "loss": -34.8067, | |
| "step": 1470 | |
| }, | |
| { | |
| "epoch": 0.13075555555555557, | |
| "grad_norm": 147.8043670654297, | |
| "learning_rate": 3e-06, | |
| "loss": -29.223, | |
| "step": 1471 | |
| }, | |
| { | |
| "epoch": 0.13084444444444446, | |
| "grad_norm": 137.1348114013672, | |
| "learning_rate": 3e-06, | |
| "loss": -38.384, | |
| "step": 1472 | |
| }, | |
| { | |
| "epoch": 0.13093333333333335, | |
| "grad_norm": 198.6035614013672, | |
| "learning_rate": 3e-06, | |
| "loss": -35.8388, | |
| "step": 1473 | |
| }, | |
| { | |
| "epoch": 0.13102222222222223, | |
| "grad_norm": 225.8579864501953, | |
| "learning_rate": 3e-06, | |
| "loss": -39.2579, | |
| "step": 1474 | |
| }, | |
| { | |
| "epoch": 0.13111111111111112, | |
| "grad_norm": 193.4779052734375, | |
| "learning_rate": 3e-06, | |
| "loss": -25.8384, | |
| "step": 1475 | |
| }, | |
| { | |
| "epoch": 0.1312, | |
| "grad_norm": 188.15464782714844, | |
| "learning_rate": 3e-06, | |
| "loss": -37.6788, | |
| "step": 1476 | |
| }, | |
| { | |
| "completion_length": 236.9375, | |
| "epoch": 0.1312888888888889, | |
| "grad_norm": 433.47833251953125, | |
| "learning_rate": 3e-06, | |
| "loss": -44.9082, | |
| "reward": 1.9166667461395264, | |
| "reward_std": 0.6611596345901489, | |
| "rewards/boxed_and_answer_tags_format_reward": 0.75, | |
| "rewards/correctness_reward_func_math": 1.1666666865348816, | |
| "step": 1477, | |
| "zero_std_ratio": 0.375 | |
| }, | |
| { | |
| "epoch": 0.1313777777777778, | |
| "grad_norm": 327.1938171386719, | |
| "learning_rate": 3e-06, | |
| "loss": -49.2004, | |
| "step": 1478 | |
| }, | |
| { | |
| "epoch": 0.13146666666666668, | |
| "grad_norm": 295.2423095703125, | |
| "learning_rate": 3e-06, | |
| "loss": -47.3065, | |
| "step": 1479 | |
| }, | |
| { | |
| "epoch": 0.13155555555555556, | |
| "grad_norm": 288.9835510253906, | |
| "learning_rate": 3e-06, | |
| "loss": -35.3959, | |
| "step": 1480 | |
| }, | |
| { | |
| "epoch": 0.13164444444444445, | |
| "grad_norm": 310.02056884765625, | |
| "learning_rate": 3e-06, | |
| "loss": -36.4699, | |
| "step": 1481 | |
| }, | |
| { | |
| "epoch": 0.13173333333333334, | |
| "grad_norm": 288.0977478027344, | |
| "learning_rate": 3e-06, | |
| "loss": -36.6703, | |
| "step": 1482 | |
| }, | |
| { | |
| "epoch": 0.13182222222222223, | |
| "grad_norm": 266.6463317871094, | |
| "learning_rate": 3e-06, | |
| "loss": -47.7283, | |
| "step": 1483 | |
| }, | |
| { | |
| "epoch": 0.13191111111111112, | |
| "grad_norm": 405.67071533203125, | |
| "learning_rate": 3e-06, | |
| "loss": -51.2972, | |
| "step": 1484 | |
| }, | |
| { | |
| "epoch": 0.132, | |
| "grad_norm": 401.3482360839844, | |
| "learning_rate": 3e-06, | |
| "loss": -52.0379, | |
| "step": 1485 | |
| }, | |
| { | |
| "epoch": 0.1320888888888889, | |
| "grad_norm": 280.8495788574219, | |
| "learning_rate": 3e-06, | |
| "loss": -39.6706, | |
| "step": 1486 | |
| }, | |
| { | |
| "epoch": 0.13217777777777778, | |
| "grad_norm": 329.5269775390625, | |
| "learning_rate": 3e-06, | |
| "loss": -41.6526, | |
| "step": 1487 | |
| }, | |
| { | |
| "epoch": 0.13226666666666667, | |
| "grad_norm": 335.015380859375, | |
| "learning_rate": 3e-06, | |
| "loss": -42.414, | |
| "step": 1488 | |
| }, | |
| { | |
| "completion_length": 244.70833587646484, | |
| "epoch": 0.13235555555555556, | |
| "grad_norm": 365.8779602050781, | |
| "learning_rate": 3e-06, | |
| "loss": 0.0487, | |
| "reward": 1.1875, | |
| "reward_std": 0.23116151988506317, | |
| "rewards/boxed_and_answer_tags_format_reward": 0.5625, | |
| "rewards/correctness_reward_func_math": 0.625, | |
| "step": 1489, | |
| "zero_std_ratio": 0.75 | |
| }, | |
| { | |
| "epoch": 0.13244444444444445, | |
| "grad_norm": 270.31494140625, | |
| "learning_rate": 3e-06, | |
| "loss": -3.1951, | |
| "step": 1490 | |
| }, | |
| { | |
| "epoch": 0.13253333333333334, | |
| "grad_norm": 350.0372619628906, | |
| "learning_rate": 3e-06, | |
| "loss": 2.4081, | |
| "step": 1491 | |
| }, | |
| { | |
| "epoch": 0.13262222222222222, | |
| "grad_norm": 297.7118835449219, | |
| "learning_rate": 3e-06, | |
| "loss": 1.5965, | |
| "step": 1492 | |
| }, | |
| { | |
| "epoch": 0.1327111111111111, | |
| "grad_norm": 251.00436401367188, | |
| "learning_rate": 3e-06, | |
| "loss": -8.6016, | |
| "step": 1493 | |
| }, | |
| { | |
| "epoch": 0.1328, | |
| "grad_norm": 364.3514709472656, | |
| "learning_rate": 3e-06, | |
| "loss": 5.7272, | |
| "step": 1494 | |
| }, | |
| { | |
| "epoch": 0.1328888888888889, | |
| "grad_norm": 327.8075256347656, | |
| "learning_rate": 3e-06, | |
| "loss": -6.1829, | |
| "step": 1495 | |
| }, | |
| { | |
| "epoch": 0.13297777777777778, | |
| "grad_norm": 816.3570556640625, | |
| "learning_rate": 3e-06, | |
| "loss": -10.451, | |
| "step": 1496 | |
| }, | |
| { | |
| "epoch": 0.13306666666666667, | |
| "grad_norm": 319.318115234375, | |
| "learning_rate": 3e-06, | |
| "loss": -7.2984, | |
| "step": 1497 | |
| }, | |
| { | |
| "epoch": 0.13315555555555555, | |
| "grad_norm": 306.3028259277344, | |
| "learning_rate": 3e-06, | |
| "loss": -7.2124, | |
| "step": 1498 | |
| }, | |
| { | |
| "epoch": 0.13324444444444444, | |
| "grad_norm": 194.4175567626953, | |
| "learning_rate": 3e-06, | |
| "loss": -14.7213, | |
| "step": 1499 | |
| }, | |
| { | |
| "epoch": 0.13333333333333333, | |
| "grad_norm": 289.50408935546875, | |
| "learning_rate": 3e-06, | |
| "loss": -8.9605, | |
| "step": 1500 | |
| }, | |
| { | |
| "completion_length": 245.8541717529297, | |
| "epoch": 0.13342222222222222, | |
| "grad_norm": 177.177734375, | |
| "learning_rate": 3e-06, | |
| "loss": -19.9228, | |
| "reward": 1.0000000596046448, | |
| "reward_std": 0.23116152733564377, | |
| "rewards/boxed_and_answer_tags_format_reward": 0.625, | |
| "rewards/correctness_reward_func_math": 0.375, | |
| "step": 1501, | |
| "zero_std_ratio": 0.75 | |
| }, | |
| { | |
| "epoch": 0.1335111111111111, | |
| "grad_norm": 256.49334716796875, | |
| "learning_rate": 3e-06, | |
| "loss": -12.3773, | |
| "step": 1502 | |
| }, | |
| { | |
| "epoch": 0.1336, | |
| "grad_norm": 200.69879150390625, | |
| "learning_rate": 3e-06, | |
| "loss": -12.0433, | |
| "step": 1503 | |
| }, | |
| { | |
| "epoch": 0.13368888888888888, | |
| "grad_norm": 195.25538635253906, | |
| "learning_rate": 3e-06, | |
| "loss": -5.3936, | |
| "step": 1504 | |
| }, | |
| { | |
| "epoch": 0.13377777777777777, | |
| "grad_norm": 179.9781036376953, | |
| "learning_rate": 3e-06, | |
| "loss": -11.4454, | |
| "step": 1505 | |
| }, | |
| { | |
| "epoch": 0.13386666666666666, | |
| "grad_norm": 255.14865112304688, | |
| "learning_rate": 3e-06, | |
| "loss": -14.0628, | |
| "step": 1506 | |
| }, | |
| { | |
| "epoch": 0.13395555555555555, | |
| "grad_norm": 185.94732666015625, | |
| "learning_rate": 3e-06, | |
| "loss": -21.484, | |
| "step": 1507 | |
| }, | |
| { | |
| "epoch": 0.13404444444444444, | |
| "grad_norm": 233.72573852539062, | |
| "learning_rate": 3e-06, | |
| "loss": -15.3775, | |
| "step": 1508 | |
| }, | |
| { | |
| "epoch": 0.13413333333333333, | |
| "grad_norm": 329.163818359375, | |
| "learning_rate": 3e-06, | |
| "loss": -14.9938, | |
| "step": 1509 | |
| }, | |
| { | |
| "epoch": 0.13422222222222221, | |
| "grad_norm": 199.61465454101562, | |
| "learning_rate": 3e-06, | |
| "loss": -7.8012, | |
| "step": 1510 | |
| }, | |
| { | |
| "epoch": 0.1343111111111111, | |
| "grad_norm": 168.71255493164062, | |
| "learning_rate": 3e-06, | |
| "loss": -14.2907, | |
| "step": 1511 | |
| }, | |
| { | |
| "epoch": 0.1344, | |
| "grad_norm": 246.87896728515625, | |
| "learning_rate": 3e-06, | |
| "loss": -15.4667, | |
| "step": 1512 | |
| }, | |
| { | |
| "completion_length": 240.18750762939453, | |
| "epoch": 0.13448888888888888, | |
| "grad_norm": 273.19427490234375, | |
| "learning_rate": 3e-06, | |
| "loss": -30.8428, | |
| "reward": 1.5625, | |
| "reward_std": 0.3410547822713852, | |
| "rewards/boxed_and_answer_tags_format_reward": 0.6875, | |
| "rewards/correctness_reward_func_math": 0.875, | |
| "step": 1513, | |
| "zero_std_ratio": 0.625 | |
| }, | |
| { | |
| "epoch": 0.13457777777777777, | |
| "grad_norm": 198.8998565673828, | |
| "learning_rate": 3e-06, | |
| "loss": -36.7035, | |
| "step": 1514 | |
| }, | |
| { | |
| "epoch": 0.13466666666666666, | |
| "grad_norm": 266.1399230957031, | |
| "learning_rate": 3e-06, | |
| "loss": -29.4233, | |
| "step": 1515 | |
| }, | |
| { | |
| "epoch": 0.13475555555555555, | |
| "grad_norm": 270.2858581542969, | |
| "learning_rate": 3e-06, | |
| "loss": -36.6868, | |
| "step": 1516 | |
| }, | |
| { | |
| "epoch": 0.13484444444444443, | |
| "grad_norm": 190.39768981933594, | |
| "learning_rate": 3e-06, | |
| "loss": -35.0192, | |
| "step": 1517 | |
| }, | |
| { | |
| "epoch": 0.13493333333333332, | |
| "grad_norm": 319.56494140625, | |
| "learning_rate": 3e-06, | |
| "loss": -35.0648, | |
| "step": 1518 | |
| }, | |
| { | |
| "epoch": 0.1350222222222222, | |
| "grad_norm": 224.65713500976562, | |
| "learning_rate": 3e-06, | |
| "loss": -35.3229, | |
| "step": 1519 | |
| }, | |
| { | |
| "epoch": 0.1351111111111111, | |
| "grad_norm": 188.5618438720703, | |
| "learning_rate": 3e-06, | |
| "loss": -41.7633, | |
| "step": 1520 | |
| }, | |
| { | |
| "epoch": 0.1352, | |
| "grad_norm": 334.20281982421875, | |
| "learning_rate": 3e-06, | |
| "loss": -34.6749, | |
| "step": 1521 | |
| }, | |
| { | |
| "epoch": 0.13528888888888888, | |
| "grad_norm": 232.7653350830078, | |
| "learning_rate": 3e-06, | |
| "loss": -42.387, | |
| "step": 1522 | |
| }, | |
| { | |
| "epoch": 0.1353777777777778, | |
| "grad_norm": 179.99102783203125, | |
| "learning_rate": 3e-06, | |
| "loss": -40.5993, | |
| "step": 1523 | |
| }, | |
| { | |
| "epoch": 0.13546666666666668, | |
| "grad_norm": 266.2838134765625, | |
| "learning_rate": 3e-06, | |
| "loss": -43.1418, | |
| "step": 1524 | |
| }, | |
| { | |
| "completion_length": 251.06250762939453, | |
| "epoch": 0.13555555555555557, | |
| "grad_norm": 379.1654052734375, | |
| "learning_rate": 3e-06, | |
| "loss": 5.5695, | |
| "reward": 1.0520833730697632, | |
| "reward_std": 0.5148759335279465, | |
| "rewards/boxed_and_answer_tags_format_reward": 0.6770833432674408, | |
| "rewards/correctness_reward_func_math": 0.375, | |
| "step": 1525, | |
| "zero_std_ratio": 0.375 | |
| }, | |
| { | |
| "epoch": 0.13564444444444446, | |
| "grad_norm": 382.55987548828125, | |
| "learning_rate": 3e-06, | |
| "loss": -2.5248, | |
| "step": 1526 | |
| }, | |
| { | |
| "epoch": 0.13573333333333334, | |
| "grad_norm": 535.1154174804688, | |
| "learning_rate": 3e-06, | |
| "loss": 2.5235, | |
| "step": 1527 | |
| }, | |
| { | |
| "epoch": 0.13582222222222223, | |
| "grad_norm": 382.97515869140625, | |
| "learning_rate": 3e-06, | |
| "loss": 25.0744, | |
| "step": 1528 | |
| }, | |
| { | |
| "epoch": 0.13591111111111112, | |
| "grad_norm": 417.135009765625, | |
| "learning_rate": 3e-06, | |
| "loss": -7.6026, | |
| "step": 1529 | |
| }, | |
| { | |
| "epoch": 0.136, | |
| "grad_norm": 452.19580078125, | |
| "learning_rate": 3e-06, | |
| "loss": -7.3517, | |
| "step": 1530 | |
| }, | |
| { | |
| "epoch": 0.1360888888888889, | |
| "grad_norm": 404.5818176269531, | |
| "learning_rate": 3e-06, | |
| "loss": 4.4363, | |
| "step": 1531 | |
| }, | |
| { | |
| "epoch": 0.1361777777777778, | |
| "grad_norm": 378.475341796875, | |
| "learning_rate": 3e-06, | |
| "loss": -7.4401, | |
| "step": 1532 | |
| }, | |
| { | |
| "epoch": 0.13626666666666667, | |
| "grad_norm": 446.7852478027344, | |
| "learning_rate": 3e-06, | |
| "loss": -2.1065, | |
| "step": 1533 | |
| }, | |
| { | |
| "epoch": 0.13635555555555556, | |
| "grad_norm": 313.30340576171875, | |
| "learning_rate": 3e-06, | |
| "loss": 20.5916, | |
| "step": 1534 | |
| }, | |
| { | |
| "epoch": 0.13644444444444445, | |
| "grad_norm": 385.8117370605469, | |
| "learning_rate": 3e-06, | |
| "loss": -12.1391, | |
| "step": 1535 | |
| }, | |
| { | |
| "epoch": 0.13653333333333334, | |
| "grad_norm": 381.8157653808594, | |
| "learning_rate": 3e-06, | |
| "loss": -10.1047, | |
| "step": 1536 | |
| }, | |
| { | |
| "completion_length": 251.9166717529297, | |
| "epoch": 0.13662222222222223, | |
| "grad_norm": 151.30177307128906, | |
| "learning_rate": 3e-06, | |
| "loss": 7.8283, | |
| "reward": 1.5, | |
| "reward_std": 0.1369306445121765, | |
| "rewards/boxed_and_answer_tags_format_reward": 0.625, | |
| "rewards/correctness_reward_func_math": 0.875, | |
| "step": 1537, | |
| "zero_std_ratio": 0.875 | |
| }, | |
| { | |
| "epoch": 0.13671111111111112, | |
| "grad_norm": 152.4805908203125, | |
| "learning_rate": 3e-06, | |
| "loss": 2.9257, | |
| "step": 1538 | |
| }, | |
| { | |
| "epoch": 0.1368, | |
| "grad_norm": 143.77023315429688, | |
| "learning_rate": 3e-06, | |
| "loss": 1.5204, | |
| "step": 1539 | |
| }, | |
| { | |
| "epoch": 0.1368888888888889, | |
| "grad_norm": 134.1708526611328, | |
| "learning_rate": 3e-06, | |
| "loss": 2.3719, | |
| "step": 1540 | |
| }, | |
| { | |
| "epoch": 0.13697777777777778, | |
| "grad_norm": 152.87826538085938, | |
| "learning_rate": 3e-06, | |
| "loss": 4.1701, | |
| "step": 1541 | |
| }, | |
| { | |
| "epoch": 0.13706666666666667, | |
| "grad_norm": 140.14395141601562, | |
| "learning_rate": 3e-06, | |
| "loss": 5.3651, | |
| "step": 1542 | |
| }, | |
| { | |
| "epoch": 0.13715555555555556, | |
| "grad_norm": 126.84275817871094, | |
| "learning_rate": 3e-06, | |
| "loss": 7.1642, | |
| "step": 1543 | |
| }, | |
| { | |
| "epoch": 0.13724444444444445, | |
| "grad_norm": 168.67564392089844, | |
| "learning_rate": 3e-06, | |
| "loss": -0.2961, | |
| "step": 1544 | |
| }, | |
| { | |
| "epoch": 0.13733333333333334, | |
| "grad_norm": 163.78794860839844, | |
| "learning_rate": 3e-06, | |
| "loss": -0.8864, | |
| "step": 1545 | |
| }, | |
| { | |
| "epoch": 0.13742222222222222, | |
| "grad_norm": 125.58057403564453, | |
| "learning_rate": 3e-06, | |
| "loss": -0.0556, | |
| "step": 1546 | |
| }, | |
| { | |
| "epoch": 0.1375111111111111, | |
| "grad_norm": 173.51966857910156, | |
| "learning_rate": 3e-06, | |
| "loss": 3.8643, | |
| "step": 1547 | |
| }, | |
| { | |
| "epoch": 0.1376, | |
| "grad_norm": 120.23555755615234, | |
| "learning_rate": 3e-06, | |
| "loss": 1.9312, | |
| "step": 1548 | |
| }, | |
| { | |
| "completion_length": 254.1041717529297, | |
| "epoch": 0.1376888888888889, | |
| "grad_norm": 355.8426208496094, | |
| "learning_rate": 3e-06, | |
| "loss": -6.5178, | |
| "reward": 1.1250000596046448, | |
| "reward_std": 0.23116152733564377, | |
| "rewards/boxed_and_answer_tags_format_reward": 0.75, | |
| "rewards/correctness_reward_func_math": 0.375, | |
| "step": 1549, | |
| "zero_std_ratio": 0.75 | |
| }, | |
| { | |
| "epoch": 0.13777777777777778, | |
| "grad_norm": 321.2608947753906, | |
| "learning_rate": 3e-06, | |
| "loss": -6.46, | |
| "step": 1550 | |
| }, | |
| { | |
| "epoch": 0.13786666666666667, | |
| "grad_norm": 220.72544860839844, | |
| "learning_rate": 3e-06, | |
| "loss": -3.6261, | |
| "step": 1551 | |
| }, | |
| { | |
| "epoch": 0.13795555555555555, | |
| "grad_norm": 475.98944091796875, | |
| "learning_rate": 3e-06, | |
| "loss": -20.6756, | |
| "step": 1552 | |
| }, | |
| { | |
| "epoch": 0.13804444444444444, | |
| "grad_norm": 345.5748291015625, | |
| "learning_rate": 3e-06, | |
| "loss": -3.3905, | |
| "step": 1553 | |
| }, | |
| { | |
| "epoch": 0.13813333333333333, | |
| "grad_norm": 285.5007629394531, | |
| "learning_rate": 3e-06, | |
| "loss": -2.0237, | |
| "step": 1554 | |
| }, | |
| { | |
| "epoch": 0.13822222222222222, | |
| "grad_norm": 395.1439514160156, | |
| "learning_rate": 3e-06, | |
| "loss": -8.0344, | |
| "step": 1555 | |
| }, | |
| { | |
| "epoch": 0.1383111111111111, | |
| "grad_norm": 300.19091796875, | |
| "learning_rate": 3e-06, | |
| "loss": -9.601, | |
| "step": 1556 | |
| }, | |
| { | |
| "epoch": 0.1384, | |
| "grad_norm": 285.38763427734375, | |
| "learning_rate": 3e-06, | |
| "loss": -7.3983, | |
| "step": 1557 | |
| }, | |
| { | |
| "epoch": 0.13848888888888888, | |
| "grad_norm": 463.34136962890625, | |
| "learning_rate": 3e-06, | |
| "loss": -25.5177, | |
| "step": 1558 | |
| }, | |
| { | |
| "epoch": 0.13857777777777777, | |
| "grad_norm": 298.12005615234375, | |
| "learning_rate": 3e-06, | |
| "loss": -4.867, | |
| "step": 1559 | |
| }, | |
| { | |
| "epoch": 0.13866666666666666, | |
| "grad_norm": 274.00836181640625, | |
| "learning_rate": 3e-06, | |
| "loss": -3.8962, | |
| "step": 1560 | |
| }, | |
| { | |
| "completion_length": 242.33334350585938, | |
| "epoch": 0.13875555555555555, | |
| "grad_norm": 310.84564208984375, | |
| "learning_rate": 3e-06, | |
| "loss": -16.4582, | |
| "reward": 1.541666716337204, | |
| "reward_std": 0.3061862289905548, | |
| "rewards/boxed_and_answer_tags_format_reward": 0.75, | |
| "rewards/correctness_reward_func_math": 0.7916666492819786, | |
| "step": 1561, | |
| "zero_std_ratio": 0.625 | |
| }, | |
| { | |
| "epoch": 0.13884444444444444, | |
| "grad_norm": 398.2083435058594, | |
| "learning_rate": 3e-06, | |
| "loss": -20.4413, | |
| "step": 1562 | |
| }, | |
| { | |
| "epoch": 0.13893333333333333, | |
| "grad_norm": 378.9441833496094, | |
| "learning_rate": 3e-06, | |
| "loss": -16.1798, | |
| "step": 1563 | |
| }, | |
| { | |
| "epoch": 0.1390222222222222, | |
| "grad_norm": 350.91192626953125, | |
| "learning_rate": 3e-06, | |
| "loss": -20.2111, | |
| "step": 1564 | |
| }, | |
| { | |
| "epoch": 0.1391111111111111, | |
| "grad_norm": 365.54754638671875, | |
| "learning_rate": 3e-06, | |
| "loss": -17.1017, | |
| "step": 1565 | |
| }, | |
| { | |
| "epoch": 0.1392, | |
| "grad_norm": 551.0444946289062, | |
| "learning_rate": 3e-06, | |
| "loss": -29.4716, | |
| "step": 1566 | |
| }, | |
| { | |
| "epoch": 0.13928888888888888, | |
| "grad_norm": 320.9019775390625, | |
| "learning_rate": 3e-06, | |
| "loss": -20.3973, | |
| "step": 1567 | |
| }, | |
| { | |
| "epoch": 0.13937777777777777, | |
| "grad_norm": 456.51190185546875, | |
| "learning_rate": 3e-06, | |
| "loss": -24.394, | |
| "step": 1568 | |
| }, | |
| { | |
| "epoch": 0.13946666666666666, | |
| "grad_norm": 406.370361328125, | |
| "learning_rate": 3e-06, | |
| "loss": -20.4216, | |
| "step": 1569 | |
| }, | |
| { | |
| "epoch": 0.13955555555555554, | |
| "grad_norm": 356.31982421875, | |
| "learning_rate": 3e-06, | |
| "loss": -24.4687, | |
| "step": 1570 | |
| }, | |
| { | |
| "epoch": 0.13964444444444443, | |
| "grad_norm": 342.86468505859375, | |
| "learning_rate": 3e-06, | |
| "loss": -23.7611, | |
| "step": 1571 | |
| }, | |
| { | |
| "epoch": 0.13973333333333332, | |
| "grad_norm": 536.0755615234375, | |
| "learning_rate": 3e-06, | |
| "loss": -33.8363, | |
| "step": 1572 | |
| }, | |
| { | |
| "completion_length": 254.18750762939453, | |
| "epoch": 0.1398222222222222, | |
| "grad_norm": 186.11480712890625, | |
| "learning_rate": 3e-06, | |
| "loss": 4.595, | |
| "reward": 0.9791666865348816, | |
| "reward_std": 0.23899271339178085, | |
| "rewards/boxed_and_answer_tags_format_reward": 0.5625, | |
| "rewards/correctness_reward_func_math": 0.4166666567325592, | |
| "step": 1573, | |
| "zero_std_ratio": 0.75 | |
| }, | |
| { | |
| "epoch": 0.13991111111111112, | |
| "grad_norm": 166.36386108398438, | |
| "learning_rate": 3e-06, | |
| "loss": 6.7414, | |
| "step": 1574 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "grad_norm": 213.84388732910156, | |
| "learning_rate": 3e-06, | |
| "loss": 4.949, | |
| "step": 1575 | |
| }, | |
| { | |
| "epoch": 0.1400888888888889, | |
| "grad_norm": 196.5020294189453, | |
| "learning_rate": 3e-06, | |
| "loss": 8.9449, | |
| "step": 1576 | |
| }, | |
| { | |
| "epoch": 0.1401777777777778, | |
| "grad_norm": 216.48086547851562, | |
| "learning_rate": 3e-06, | |
| "loss": 7.0276, | |
| "step": 1577 | |
| }, | |
| { | |
| "epoch": 0.14026666666666668, | |
| "grad_norm": 188.523681640625, | |
| "learning_rate": 3e-06, | |
| "loss": 5.1123, | |
| "step": 1578 | |
| }, | |
| { | |
| "epoch": 0.14035555555555557, | |
| "grad_norm": 223.4918975830078, | |
| "learning_rate": 3e-06, | |
| "loss": 2.6615, | |
| "step": 1579 | |
| }, | |
| { | |
| "epoch": 0.14044444444444446, | |
| "grad_norm": 218.71112060546875, | |
| "learning_rate": 3e-06, | |
| "loss": 3.1185, | |
| "step": 1580 | |
| }, | |
| { | |
| "epoch": 0.14053333333333334, | |
| "grad_norm": 223.2500762939453, | |
| "learning_rate": 3e-06, | |
| "loss": 1.1539, | |
| "step": 1581 | |
| }, | |
| { | |
| "epoch": 0.14062222222222223, | |
| "grad_norm": 176.69094848632812, | |
| "learning_rate": 3e-06, | |
| "loss": 5.1857, | |
| "step": 1582 | |
| }, | |
| { | |
| "epoch": 0.14071111111111112, | |
| "grad_norm": 243.36929321289062, | |
| "learning_rate": 3e-06, | |
| "loss": 4.2964, | |
| "step": 1583 | |
| }, | |
| { | |
| "epoch": 0.1408, | |
| "grad_norm": 269.2211608886719, | |
| "learning_rate": 3e-06, | |
| "loss": 1.8612, | |
| "step": 1584 | |
| }, | |
| { | |
| "completion_length": 245.9166717529297, | |
| "epoch": 0.1408888888888889, | |
| "grad_norm": 542.5291137695312, | |
| "learning_rate": 3e-06, | |
| "loss": 14.2794, | |
| "reward": 0.9791666865348816, | |
| "reward_std": 0.5643851011991501, | |
| "rewards/boxed_and_answer_tags_format_reward": 0.6875, | |
| "rewards/correctness_reward_func_math": 0.2916666716337204, | |
| "step": 1585, | |
| "zero_std_ratio": 0.375 | |
| }, | |
| { | |
| "epoch": 0.14097777777777779, | |
| "grad_norm": 575.147705078125, | |
| "learning_rate": 3e-06, | |
| "loss": 17.5223, | |
| "step": 1586 | |
| }, | |
| { | |
| "epoch": 0.14106666666666667, | |
| "grad_norm": 405.6719970703125, | |
| "learning_rate": 3e-06, | |
| "loss": 9.7247, | |
| "step": 1587 | |
| }, | |
| { | |
| "epoch": 0.14115555555555556, | |
| "grad_norm": 528.659912109375, | |
| "learning_rate": 3e-06, | |
| "loss": 35.2638, | |
| "step": 1588 | |
| }, | |
| { | |
| "epoch": 0.14124444444444445, | |
| "grad_norm": 514.4658203125, | |
| "learning_rate": 3e-06, | |
| "loss": 14.2266, | |
| "step": 1589 | |
| }, | |
| { | |
| "epoch": 0.14133333333333334, | |
| "grad_norm": 535.8659057617188, | |
| "learning_rate": 3e-06, | |
| "loss": 13.3064, | |
| "step": 1590 | |
| }, | |
| { | |
| "epoch": 0.14142222222222223, | |
| "grad_norm": 538.135009765625, | |
| "learning_rate": 3e-06, | |
| "loss": 7.7669, | |
| "step": 1591 | |
| }, | |
| { | |
| "epoch": 0.14151111111111112, | |
| "grad_norm": 547.5822143554688, | |
| "learning_rate": 3e-06, | |
| "loss": 13.0588, | |
| "step": 1592 | |
| }, | |
| { | |
| "epoch": 0.1416, | |
| "grad_norm": 415.9985046386719, | |
| "learning_rate": 3e-06, | |
| "loss": 4.3967, | |
| "step": 1593 | |
| }, | |
| { | |
| "epoch": 0.1416888888888889, | |
| "grad_norm": 553.2412719726562, | |
| "learning_rate": 3e-06, | |
| "loss": 26.3526, | |
| "step": 1594 | |
| }, | |
| { | |
| "epoch": 0.14177777777777778, | |
| "grad_norm": 512.5444946289062, | |
| "learning_rate": 3e-06, | |
| "loss": 5.7301, | |
| "step": 1595 | |
| }, | |
| { | |
| "epoch": 0.14186666666666667, | |
| "grad_norm": 580.6036987304688, | |
| "learning_rate": 3e-06, | |
| "loss": 4.7651, | |
| "step": 1596 | |
| }, | |
| { | |
| "completion_length": 235.7916717529297, | |
| "epoch": 0.14195555555555556, | |
| "grad_norm": 347.6015319824219, | |
| "learning_rate": 3e-06, | |
| "loss": -18.3809, | |
| "reward": 1.5416666865348816, | |
| "reward_std": 0.3332235887646675, | |
| "rewards/boxed_and_answer_tags_format_reward": 0.625, | |
| "rewards/correctness_reward_func_math": 0.9166666567325592, | |
| "step": 1597, | |
| "zero_std_ratio": 0.625 | |
| }, | |
| { | |
| "epoch": 0.14204444444444445, | |
| "grad_norm": 317.97137451171875, | |
| "learning_rate": 3e-06, | |
| "loss": 2.7823, | |
| "step": 1598 | |
| }, | |
| { | |
| "epoch": 0.14213333333333333, | |
| "grad_norm": 202.9899444580078, | |
| "learning_rate": 3e-06, | |
| "loss": -5.0647, | |
| "step": 1599 | |
| }, | |
| { | |
| "epoch": 0.14222222222222222, | |
| "grad_norm": 786.5781860351562, | |
| "learning_rate": 3e-06, | |
| "loss": -4.8509, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 0.1423111111111111, | |
| "grad_norm": 290.1077575683594, | |
| "learning_rate": 3e-06, | |
| "loss": -8.7908, | |
| "step": 1601 | |
| }, | |
| { | |
| "epoch": 0.1424, | |
| "grad_norm": 198.83493041992188, | |
| "learning_rate": 3e-06, | |
| "loss": 8.0596, | |
| "step": 1602 | |
| }, | |
| { | |
| "epoch": 0.1424888888888889, | |
| "grad_norm": 270.3849792480469, | |
| "learning_rate": 3e-06, | |
| "loss": -21.899, | |
| "step": 1603 | |
| }, | |
| { | |
| "epoch": 0.14257777777777778, | |
| "grad_norm": 1309.112060546875, | |
| "learning_rate": 3e-06, | |
| "loss": -1.111, | |
| "step": 1604 | |
| }, | |
| { | |
| "epoch": 0.14266666666666666, | |
| "grad_norm": 212.12266540527344, | |
| "learning_rate": 3e-06, | |
| "loss": -7.2685, | |
| "step": 1605 | |
| }, | |
| { | |
| "epoch": 0.14275555555555555, | |
| "grad_norm": 242.00680541992188, | |
| "learning_rate": 3e-06, | |
| "loss": -7.1693, | |
| "step": 1606 | |
| }, | |
| { | |
| "epoch": 0.14284444444444444, | |
| "grad_norm": 271.86090087890625, | |
| "learning_rate": 3e-06, | |
| "loss": -10.3562, | |
| "step": 1607 | |
| }, | |
| { | |
| "epoch": 0.14293333333333333, | |
| "grad_norm": 233.82144165039062, | |
| "learning_rate": 3e-06, | |
| "loss": 4.7331, | |
| "step": 1608 | |
| }, | |
| { | |
| "completion_length": 254.1666717529297, | |
| "epoch": 0.14302222222222222, | |
| "grad_norm": 183.18118286132812, | |
| "learning_rate": 3e-06, | |
| "loss": -27.8021, | |
| "reward": 1.8958333730697632, | |
| "reward_std": 0.10206206887960434, | |
| "rewards/boxed_and_answer_tags_format_reward": 0.6875, | |
| "rewards/correctness_reward_func_math": 1.2083333134651184, | |
| "step": 1609, | |
| "zero_std_ratio": 0.875 | |
| }, | |
| { | |
| "epoch": 0.1431111111111111, | |
| "grad_norm": 393.4090270996094, | |
| "learning_rate": 3e-06, | |
| "loss": -29.9682, | |
| "step": 1610 | |
| }, | |
| { | |
| "epoch": 0.1432, | |
| "grad_norm": 232.8114776611328, | |
| "learning_rate": 3e-06, | |
| "loss": -21.6474, | |
| "step": 1611 | |
| }, | |
| { | |
| "epoch": 0.14328888888888888, | |
| "grad_norm": 304.4367370605469, | |
| "learning_rate": 3e-06, | |
| "loss": -21.4802, | |
| "step": 1612 | |
| }, | |
| { | |
| "epoch": 0.14337777777777777, | |
| "grad_norm": 162.92181396484375, | |
| "learning_rate": 3e-06, | |
| "loss": -28.1092, | |
| "step": 1613 | |
| }, | |
| { | |
| "epoch": 0.14346666666666666, | |
| "grad_norm": 209.14356994628906, | |
| "learning_rate": 3e-06, | |
| "loss": -19.8933, | |
| "step": 1614 | |
| }, | |
| { | |
| "epoch": 0.14355555555555555, | |
| "grad_norm": 697.1129760742188, | |
| "learning_rate": 3e-06, | |
| "loss": -27.563, | |
| "step": 1615 | |
| }, | |
| { | |
| "epoch": 0.14364444444444444, | |
| "grad_norm": 311.41851806640625, | |
| "learning_rate": 3e-06, | |
| "loss": -33.0986, | |
| "step": 1616 | |
| }, | |
| { | |
| "epoch": 0.14373333333333332, | |
| "grad_norm": 257.729248046875, | |
| "learning_rate": 3e-06, | |
| "loss": -26.3909, | |
| "step": 1617 | |
| }, | |
| { | |
| "epoch": 0.1438222222222222, | |
| "grad_norm": 258.4046936035156, | |
| "learning_rate": 3e-06, | |
| "loss": -26.2488, | |
| "step": 1618 | |
| }, | |
| { | |
| "epoch": 0.1439111111111111, | |
| "grad_norm": 130.1263885498047, | |
| "learning_rate": 3e-06, | |
| "loss": -30.1948, | |
| "step": 1619 | |
| }, | |
| { | |
| "epoch": 0.144, | |
| "grad_norm": 182.67807006835938, | |
| "learning_rate": 3e-06, | |
| "loss": -23.4364, | |
| "step": 1620 | |
| }, | |
| { | |
| "completion_length": 249.4375, | |
| "epoch": 0.14408888888888888, | |
| "grad_norm": 354.3665771484375, | |
| "learning_rate": 3e-06, | |
| "loss": 8.1684, | |
| "reward": 1.3229166865348816, | |
| "reward_std": 0.15461497008800507, | |
| "rewards/boxed_and_answer_tags_format_reward": 0.7395833432674408, | |
| "rewards/correctness_reward_func_math": 0.5833333432674408, | |
| "step": 1621, | |
| "zero_std_ratio": 0.75 | |
| }, | |
| { | |
| "epoch": 0.14417777777777777, | |
| "grad_norm": 424.4937438964844, | |
| "learning_rate": 3e-06, | |
| "loss": -0.1374, | |
| "step": 1622 | |
| }, | |
| { | |
| "epoch": 0.14426666666666665, | |
| "grad_norm": 441.7712097167969, | |
| "learning_rate": 3e-06, | |
| "loss": -5.8721, | |
| "step": 1623 | |
| }, | |
| { | |
| "epoch": 0.14435555555555554, | |
| "grad_norm": 474.1778259277344, | |
| "learning_rate": 3e-06, | |
| "loss": -4.5006, | |
| "step": 1624 | |
| }, | |
| { | |
| "epoch": 0.14444444444444443, | |
| "grad_norm": 464.2291564941406, | |
| "learning_rate": 3e-06, | |
| "loss": -9.1126, | |
| "step": 1625 | |
| }, | |
| { | |
| "epoch": 0.14453333333333335, | |
| "grad_norm": 375.41595458984375, | |
| "learning_rate": 3e-06, | |
| "loss": -1.7981, | |
| "step": 1626 | |
| }, | |
| { | |
| "epoch": 0.14462222222222224, | |
| "grad_norm": 591.2286376953125, | |
| "learning_rate": 3e-06, | |
| "loss": -2.7549, | |
| "step": 1627 | |
| }, | |
| { | |
| "epoch": 0.14471111111111112, | |
| "grad_norm": 269.577880859375, | |
| "learning_rate": 3e-06, | |
| "loss": -15.3618, | |
| "step": 1628 | |
| }, | |
| { | |
| "epoch": 0.1448, | |
| "grad_norm": 366.1959533691406, | |
| "learning_rate": 3e-06, | |
| "loss": -17.4499, | |
| "step": 1629 | |
| }, | |
| { | |
| "epoch": 0.1448888888888889, | |
| "grad_norm": 248.89236450195312, | |
| "learning_rate": 3e-06, | |
| "loss": -18.4987, | |
| "step": 1630 | |
| }, | |
| { | |
| "epoch": 0.1449777777777778, | |
| "grad_norm": 173.4352569580078, | |
| "learning_rate": 3e-06, | |
| "loss": -16.0213, | |
| "step": 1631 | |
| }, | |
| { | |
| "epoch": 0.14506666666666668, | |
| "grad_norm": 558.5076904296875, | |
| "learning_rate": 3e-06, | |
| "loss": -18.2749, | |
| "step": 1632 | |
| }, | |
| { | |
| "completion_length": 251.20833587646484, | |
| "epoch": 0.14515555555555557, | |
| "grad_norm": 273.303955078125, | |
| "learning_rate": 3e-06, | |
| "loss": -13.4676, | |
| "reward": 1.6145833730697632, | |
| "reward_std": 0.33129163831472397, | |
| "rewards/boxed_and_answer_tags_format_reward": 0.6979166567325592, | |
| "rewards/correctness_reward_func_math": 0.9166666865348816, | |
| "step": 1633, | |
| "zero_std_ratio": 0.625 | |
| }, | |
| { | |
| "epoch": 0.14524444444444445, | |
| "grad_norm": 275.0503845214844, | |
| "learning_rate": 3e-06, | |
| "loss": -3.2551, | |
| "step": 1634 | |
| }, | |
| { | |
| "epoch": 0.14533333333333334, | |
| "grad_norm": 399.2369384765625, | |
| "learning_rate": 3e-06, | |
| "loss": -5.5698, | |
| "step": 1635 | |
| }, | |
| { | |
| "epoch": 0.14542222222222223, | |
| "grad_norm": 328.1588439941406, | |
| "learning_rate": 3e-06, | |
| "loss": -18.8269, | |
| "step": 1636 | |
| }, | |
| { | |
| "epoch": 0.14551111111111112, | |
| "grad_norm": 239.95028686523438, | |
| "learning_rate": 3e-06, | |
| "loss": -6.0394, | |
| "step": 1637 | |
| }, | |
| { | |
| "epoch": 0.1456, | |
| "grad_norm": 290.9996643066406, | |
| "learning_rate": 3e-06, | |
| "loss": -3.1009, | |
| "step": 1638 | |
| }, | |
| { | |
| "epoch": 0.1456888888888889, | |
| "grad_norm": 334.3653869628906, | |
| "learning_rate": 3e-06, | |
| "loss": -17.4325, | |
| "step": 1639 | |
| }, | |
| { | |
| "epoch": 0.14577777777777778, | |
| "grad_norm": 301.37139892578125, | |
| "learning_rate": 3e-06, | |
| "loss": -7.3761, | |
| "step": 1640 | |
| }, | |
| { | |
| "epoch": 0.14586666666666667, | |
| "grad_norm": 593.1119995117188, | |
| "learning_rate": 3e-06, | |
| "loss": -12.5855, | |
| "step": 1641 | |
| }, | |
| { | |
| "epoch": 0.14595555555555556, | |
| "grad_norm": 394.4963073730469, | |
| "learning_rate": 3e-06, | |
| "loss": -21.8609, | |
| "step": 1642 | |
| }, | |
| { | |
| "epoch": 0.14604444444444445, | |
| "grad_norm": 239.55348205566406, | |
| "learning_rate": 3e-06, | |
| "loss": -9.8962, | |
| "step": 1643 | |
| }, | |
| { | |
| "epoch": 0.14613333333333334, | |
| "grad_norm": 279.3672180175781, | |
| "learning_rate": 3e-06, | |
| "loss": -8.5061, | |
| "step": 1644 | |
| }, | |
| { | |
| "completion_length": 241.9166717529297, | |
| "epoch": 0.14622222222222223, | |
| "grad_norm": 477.223388671875, | |
| "learning_rate": 3e-06, | |
| "loss": -12.8053, | |
| "reward": 1.1875000596046448, | |
| "reward_std": 0.3332235887646675, | |
| "rewards/boxed_and_answer_tags_format_reward": 0.6875, | |
| "rewards/correctness_reward_func_math": 0.4999999850988388, | |
| "step": 1645, | |
| "zero_std_ratio": 0.625 | |
| }, | |
| { | |
| "epoch": 0.14631111111111111, | |
| "grad_norm": 255.73138427734375, | |
| "learning_rate": 3e-06, | |
| "loss": 6.7089, | |
| "step": 1646 | |
| }, | |
| { | |
| "epoch": 0.1464, | |
| "grad_norm": 414.9920959472656, | |
| "learning_rate": 3e-06, | |
| "loss": -17.6565, | |
| "step": 1647 | |
| }, | |
| { | |
| "epoch": 0.1464888888888889, | |
| "grad_norm": 362.6864318847656, | |
| "learning_rate": 3e-06, | |
| "loss": -5.9471, | |
| "step": 1648 | |
| }, | |
| { | |
| "epoch": 0.14657777777777778, | |
| "grad_norm": 257.5548400878906, | |
| "learning_rate": 3e-06, | |
| "loss": 7.0843, | |
| "step": 1649 | |
| }, | |
| { | |
| "epoch": 0.14666666666666667, | |
| "grad_norm": 407.9867248535156, | |
| "learning_rate": 3e-06, | |
| "loss": -8.5254, | |
| "step": 1650 | |
| }, | |
| { | |
| "epoch": 0.14675555555555556, | |
| "grad_norm": 326.8368225097656, | |
| "learning_rate": 3e-06, | |
| "loss": -14.7068, | |
| "step": 1651 | |
| }, | |
| { | |
| "epoch": 0.14684444444444444, | |
| "grad_norm": 222.26805114746094, | |
| "learning_rate": 3e-06, | |
| "loss": 4.1678, | |
| "step": 1652 | |
| }, | |
| { | |
| "epoch": 0.14693333333333333, | |
| "grad_norm": 361.6373596191406, | |
| "learning_rate": 3e-06, | |
| "loss": -22.3731, | |
| "step": 1653 | |
| }, | |
| { | |
| "epoch": 0.14702222222222222, | |
| "grad_norm": 331.17803955078125, | |
| "learning_rate": 3e-06, | |
| "loss": -9.9171, | |
| "step": 1654 | |
| }, | |
| { | |
| "epoch": 0.1471111111111111, | |
| "grad_norm": 365.830078125, | |
| "learning_rate": 3e-06, | |
| "loss": 1.295, | |
| "step": 1655 | |
| }, | |
| { | |
| "epoch": 0.1472, | |
| "grad_norm": 400.97723388671875, | |
| "learning_rate": 3e-06, | |
| "loss": -15.896, | |
| "step": 1656 | |
| }, | |
| { | |
| "completion_length": 246.3541717529297, | |
| "epoch": 0.14728888888888889, | |
| "grad_norm": 407.52398681640625, | |
| "learning_rate": 3e-06, | |
| "loss": -95.2234, | |
| "reward": 1.4583333730697632, | |
| "reward_std": 0.6184598803520203, | |
| "rewards/boxed_and_answer_tags_format_reward": 0.5, | |
| "rewards/correctness_reward_func_math": 0.9583333432674408, | |
| "step": 1657, | |
| "zero_std_ratio": 0.375 | |
| }, | |
| { | |
| "epoch": 0.14737777777777777, | |
| "grad_norm": 386.23016357421875, | |
| "learning_rate": 3e-06, | |
| "loss": -94.5723, | |
| "step": 1658 | |
| }, | |
| { | |
| "epoch": 0.14746666666666666, | |
| "grad_norm": 337.7243347167969, | |
| "learning_rate": 3e-06, | |
| "loss": -78.7432, | |
| "step": 1659 | |
| }, | |
| { | |
| "epoch": 0.14755555555555555, | |
| "grad_norm": 391.17547607421875, | |
| "learning_rate": 3e-06, | |
| "loss": -95.9404, | |
| "step": 1660 | |
| }, | |
| { | |
| "epoch": 0.14764444444444444, | |
| "grad_norm": 462.5135498046875, | |
| "learning_rate": 3e-06, | |
| "loss": -69.7866, | |
| "step": 1661 | |
| }, | |
| { | |
| "epoch": 0.14773333333333333, | |
| "grad_norm": 326.1936340332031, | |
| "learning_rate": 3e-06, | |
| "loss": -70.0077, | |
| "step": 1662 | |
| }, | |
| { | |
| "epoch": 0.14782222222222222, | |
| "grad_norm": 417.0813903808594, | |
| "learning_rate": 3e-06, | |
| "loss": -104.9765, | |
| "step": 1663 | |
| }, | |
| { | |
| "epoch": 0.1479111111111111, | |
| "grad_norm": 407.79150390625, | |
| "learning_rate": 3e-06, | |
| "loss": -99.052, | |
| "step": 1664 | |
| }, | |
| { | |
| "epoch": 0.148, | |
| "grad_norm": 459.3847961425781, | |
| "learning_rate": 3e-06, | |
| "loss": -89.9644, | |
| "step": 1665 | |
| }, | |
| { | |
| "epoch": 0.14808888888888888, | |
| "grad_norm": 449.5730285644531, | |
| "learning_rate": 3e-06, | |
| "loss": -106.9796, | |
| "step": 1666 | |
| }, | |
| { | |
| "epoch": 0.14817777777777777, | |
| "grad_norm": 431.5627746582031, | |
| "learning_rate": 3e-06, | |
| "loss": -78.6433, | |
| "step": 1667 | |
| }, | |
| { | |
| "epoch": 0.14826666666666666, | |
| "grad_norm": 356.53759765625, | |
| "learning_rate": 3e-06, | |
| "loss": -78.1133, | |
| "step": 1668 | |
| }, | |
| { | |
| "completion_length": 250.12500762939453, | |
| "epoch": 0.14835555555555555, | |
| "grad_norm": 798.4279174804688, | |
| "learning_rate": 3e-06, | |
| "loss": 13.9371, | |
| "reward": 1.2500000596046448, | |
| "reward_std": 0.778884083032608, | |
| "rewards/boxed_and_answer_tags_format_reward": 0.625, | |
| "rewards/correctness_reward_func_math": 0.625, | |
| "step": 1669, | |
| "zero_std_ratio": 0.25 | |
| }, | |
| { | |
| "epoch": 0.14844444444444443, | |
| "grad_norm": 674.0550537109375, | |
| "learning_rate": 3e-06, | |
| "loss": -39.024, | |
| "step": 1670 | |
| }, | |
| { | |
| "epoch": 0.14853333333333332, | |
| "grad_norm": 686.0006103515625, | |
| "learning_rate": 3e-06, | |
| "loss": -26.081, | |
| "step": 1671 | |
| }, | |
| { | |
| "epoch": 0.1486222222222222, | |
| "grad_norm": 854.5956420898438, | |
| "learning_rate": 3e-06, | |
| "loss": -6.9918, | |
| "step": 1672 | |
| }, | |
| { | |
| "epoch": 0.1487111111111111, | |
| "grad_norm": 588.1673583984375, | |
| "learning_rate": 3e-06, | |
| "loss": -41.4908, | |
| "step": 1673 | |
| }, | |
| { | |
| "epoch": 0.1488, | |
| "grad_norm": 802.58544921875, | |
| "learning_rate": 3e-06, | |
| "loss": -9.7048, | |
| "step": 1674 | |
| }, | |
| { | |
| "epoch": 0.14888888888888888, | |
| "grad_norm": 842.8211669921875, | |
| "learning_rate": 3e-06, | |
| "loss": 3.388, | |
| "step": 1675 | |
| }, | |
| { | |
| "epoch": 0.14897777777777776, | |
| "grad_norm": 603.185791015625, | |
| "learning_rate": 3e-06, | |
| "loss": -51.6478, | |
| "step": 1676 | |
| }, | |
| { | |
| "epoch": 0.14906666666666665, | |
| "grad_norm": 667.0884399414062, | |
| "learning_rate": 3e-06, | |
| "loss": -34.1086, | |
| "step": 1677 | |
| }, | |
| { | |
| "epoch": 0.14915555555555557, | |
| "grad_norm": 702.9710693359375, | |
| "learning_rate": 3e-06, | |
| "loss": -14.8077, | |
| "step": 1678 | |
| }, | |
| { | |
| "epoch": 0.14924444444444446, | |
| "grad_norm": 887.6446533203125, | |
| "learning_rate": 3e-06, | |
| "loss": -53.0034, | |
| "step": 1679 | |
| }, | |
| { | |
| "epoch": 0.14933333333333335, | |
| "grad_norm": 738.0032348632812, | |
| "learning_rate": 3e-06, | |
| "loss": -19.4303, | |
| "step": 1680 | |
| }, | |
| { | |
| "completion_length": 252.8541717529297, | |
| "epoch": 0.14942222222222223, | |
| "grad_norm": 563.926513671875, | |
| "learning_rate": 3e-06, | |
| "loss": 4.8877, | |
| "reward": 1.0625, | |
| "reward_std": 0.43528565764427185, | |
| "rewards/boxed_and_answer_tags_format_reward": 0.6875, | |
| "rewards/correctness_reward_func_math": 0.375, | |
| "step": 1681, | |
| "zero_std_ratio": 0.5 | |
| }, | |
| { | |
| "epoch": 0.14951111111111112, | |
| "grad_norm": 571.0492553710938, | |
| "learning_rate": 3e-06, | |
| "loss": -12.3935, | |
| "step": 1682 | |
| }, | |
| { | |
| "epoch": 0.1496, | |
| "grad_norm": 365.8373107910156, | |
| "learning_rate": 3e-06, | |
| "loss": -5.9182, | |
| "step": 1683 | |
| }, | |
| { | |
| "epoch": 0.1496888888888889, | |
| "grad_norm": 700.759033203125, | |
| "learning_rate": 3e-06, | |
| "loss": 14.5414, | |
| "step": 1684 | |
| }, | |
| { | |
| "epoch": 0.1497777777777778, | |
| "grad_norm": 600.5524291992188, | |
| "learning_rate": 3e-06, | |
| "loss": 4.631, | |
| "step": 1685 | |
| }, | |
| { | |
| "epoch": 0.14986666666666668, | |
| "grad_norm": 505.4430847167969, | |
| "learning_rate": 3e-06, | |
| "loss": -27.7184, | |
| "step": 1686 | |
| }, | |
| { | |
| "epoch": 0.14995555555555556, | |
| "grad_norm": 464.21759033203125, | |
| "learning_rate": 3e-06, | |
| "loss": -0.9671, | |
| "step": 1687 | |
| }, | |
| { | |
| "epoch": 0.15004444444444445, | |
| "grad_norm": 500.7622985839844, | |
| "learning_rate": 3e-06, | |
| "loss": -18.8001, | |
| "step": 1688 | |
| }, | |
| { | |
| "epoch": 0.15013333333333334, | |
| "grad_norm": 369.3395080566406, | |
| "learning_rate": 3e-06, | |
| "loss": -9.8684, | |
| "step": 1689 | |
| }, | |
| { | |
| "epoch": 0.15022222222222223, | |
| "grad_norm": 881.677001953125, | |
| "learning_rate": 3e-06, | |
| "loss": 6.5321, | |
| "step": 1690 | |
| }, | |
| { | |
| "epoch": 0.15031111111111112, | |
| "grad_norm": 586.9358520507812, | |
| "learning_rate": 3e-06, | |
| "loss": -3.3023, | |
| "step": 1691 | |
| }, | |
| { | |
| "epoch": 0.1504, | |
| "grad_norm": 406.6219787597656, | |
| "learning_rate": 3e-06, | |
| "loss": -33.3837, | |
| "step": 1692 | |
| }, | |
| { | |
| "completion_length": 250.125, | |
| "epoch": 0.1504888888888889, | |
| "grad_norm": 382.4415283203125, | |
| "learning_rate": 3e-06, | |
| "loss": 19.5614, | |
| "reward": 1.3750000596046448, | |
| "reward_std": 0.3602609932422638, | |
| "rewards/boxed_and_answer_tags_format_reward": 0.75, | |
| "rewards/correctness_reward_func_math": 0.625, | |
| "step": 1693, | |
| "zero_std_ratio": 0.625 | |
| }, | |
| { | |
| "epoch": 0.15057777777777778, | |
| "grad_norm": 349.4650573730469, | |
| "learning_rate": 3e-06, | |
| "loss": 22.1681, | |
| "step": 1694 | |
| }, | |
| { | |
| "epoch": 0.15066666666666667, | |
| "grad_norm": 306.9416809082031, | |
| "learning_rate": 3e-06, | |
| "loss": 34.961, | |
| "step": 1695 | |
| }, | |
| { | |
| "epoch": 0.15075555555555556, | |
| "grad_norm": 353.93121337890625, | |
| "learning_rate": 3e-06, | |
| "loss": 41.0782, | |
| "step": 1696 | |
| }, | |
| { | |
| "epoch": 0.15084444444444445, | |
| "grad_norm": 213.22996520996094, | |
| "learning_rate": 3e-06, | |
| "loss": 26.4404, | |
| "step": 1697 | |
| }, | |
| { | |
| "epoch": 0.15093333333333334, | |
| "grad_norm": 356.7481994628906, | |
| "learning_rate": 3e-06, | |
| "loss": 16.8735, | |
| "step": 1698 | |
| }, | |
| { | |
| "epoch": 0.15102222222222222, | |
| "grad_norm": 394.1246643066406, | |
| "learning_rate": 3e-06, | |
| "loss": 15.6199, | |
| "step": 1699 | |
| }, | |
| { | |
| "epoch": 0.1511111111111111, | |
| "grad_norm": 358.08831787109375, | |
| "learning_rate": 3e-06, | |
| "loss": 19.0218, | |
| "step": 1700 | |
| }, | |
| { | |
| "epoch": 0.1512, | |
| "grad_norm": 290.66485595703125, | |
| "learning_rate": 3e-06, | |
| "loss": 30.4685, | |
| "step": 1701 | |
| }, | |
| { | |
| "epoch": 0.1512888888888889, | |
| "grad_norm": 427.0489501953125, | |
| "learning_rate": 3e-06, | |
| "loss": 35.7429, | |
| "step": 1702 | |
| }, | |
| { | |
| "epoch": 0.15137777777777778, | |
| "grad_norm": 218.4366912841797, | |
| "learning_rate": 3e-06, | |
| "loss": 22.0305, | |
| "step": 1703 | |
| }, | |
| { | |
| "epoch": 0.15146666666666667, | |
| "grad_norm": 375.6819152832031, | |
| "learning_rate": 3e-06, | |
| "loss": 14.9698, | |
| "step": 1704 | |
| }, | |
| { | |
| "completion_length": 232.77083587646484, | |
| "epoch": 0.15155555555555555, | |
| "grad_norm": 213.29344177246094, | |
| "learning_rate": 3e-06, | |
| "loss": 5.0413, | |
| "reward": 1.4791666865348816, | |
| "reward_std": 0.10206206887960434, | |
| "rewards/boxed_and_answer_tags_format_reward": 0.6875, | |
| "rewards/correctness_reward_func_math": 0.7916666567325592, | |
| "step": 1705, | |
| "zero_std_ratio": 0.875 | |
| }, | |
| { | |
| "epoch": 0.15164444444444444, | |
| "grad_norm": 168.3350067138672, | |
| "learning_rate": 3e-06, | |
| "loss": -3.6939, | |
| "step": 1706 | |
| }, | |
| { | |
| "epoch": 0.15173333333333333, | |
| "grad_norm": 159.5379638671875, | |
| "learning_rate": 3e-06, | |
| "loss": 1.1446, | |
| "step": 1707 | |
| }, | |
| { | |
| "epoch": 0.15182222222222222, | |
| "grad_norm": 144.53854370117188, | |
| "learning_rate": 3e-06, | |
| "loss": 3.3238, | |
| "step": 1708 | |
| }, | |
| { | |
| "epoch": 0.1519111111111111, | |
| "grad_norm": 174.06390380859375, | |
| "learning_rate": 3e-06, | |
| "loss": -0.1332, | |
| "step": 1709 | |
| }, | |
| { | |
| "epoch": 0.152, | |
| "grad_norm": 124.43144989013672, | |
| "learning_rate": 3e-06, | |
| "loss": -4.2987, | |
| "step": 1710 | |
| }, | |
| { | |
| "epoch": 0.15208888888888888, | |
| "grad_norm": 242.69232177734375, | |
| "learning_rate": 3e-06, | |
| "loss": 1.5975, | |
| "step": 1711 | |
| }, | |
| { | |
| "epoch": 0.15217777777777777, | |
| "grad_norm": 172.77381896972656, | |
| "learning_rate": 3e-06, | |
| "loss": -5.9669, | |
| "step": 1712 | |
| }, | |
| { | |
| "epoch": 0.15226666666666666, | |
| "grad_norm": 139.244873046875, | |
| "learning_rate": 3e-06, | |
| "loss": -1.5727, | |
| "step": 1713 | |
| }, | |
| { | |
| "epoch": 0.15235555555555555, | |
| "grad_norm": 133.6866455078125, | |
| "learning_rate": 3e-06, | |
| "loss": -0.2409, | |
| "step": 1714 | |
| }, | |
| { | |
| "epoch": 0.15244444444444444, | |
| "grad_norm": 171.1123809814453, | |
| "learning_rate": 3e-06, | |
| "loss": -3.8625, | |
| "step": 1715 | |
| }, | |
| { | |
| "epoch": 0.15253333333333333, | |
| "grad_norm": 110.9591064453125, | |
| "learning_rate": 3e-06, | |
| "loss": -6.3714, | |
| "step": 1716 | |
| }, | |
| { | |
| "completion_length": 254.12500762939453, | |
| "epoch": 0.15262222222222221, | |
| "grad_norm": 464.7311096191406, | |
| "learning_rate": 3e-06, | |
| "loss": -16.6423, | |
| "reward": 1.2916666865348816, | |
| "reward_std": 0.48936043679714203, | |
| "rewards/boxed_and_answer_tags_format_reward": 0.75, | |
| "rewards/correctness_reward_func_math": 0.5416666567325592, | |
| "step": 1717, | |
| "zero_std_ratio": 0.5 | |
| }, | |
| { | |
| "epoch": 0.1527111111111111, | |
| "grad_norm": 429.9415588378906, | |
| "learning_rate": 3e-06, | |
| "loss": -21.4782, | |
| "step": 1718 | |
| }, | |
| { | |
| "epoch": 0.1528, | |
| "grad_norm": 415.4131774902344, | |
| "learning_rate": 3e-06, | |
| "loss": -38.5279, | |
| "step": 1719 | |
| }, | |
| { | |
| "epoch": 0.15288888888888888, | |
| "grad_norm": 589.5310668945312, | |
| "learning_rate": 3e-06, | |
| "loss": -19.3757, | |
| "step": 1720 | |
| }, | |
| { | |
| "epoch": 0.15297777777777777, | |
| "grad_norm": 492.2614440917969, | |
| "learning_rate": 3e-06, | |
| "loss": -28.3813, | |
| "step": 1721 | |
| }, | |
| { | |
| "epoch": 0.15306666666666666, | |
| "grad_norm": 381.6370849609375, | |
| "learning_rate": 3e-06, | |
| "loss": -23.7667, | |
| "step": 1722 | |
| }, | |
| { | |
| "epoch": 0.15315555555555554, | |
| "grad_norm": 448.4298095703125, | |
| "learning_rate": 3e-06, | |
| "loss": -20.063, | |
| "step": 1723 | |
| }, | |
| { | |
| "epoch": 0.15324444444444443, | |
| "grad_norm": 351.5326232910156, | |
| "learning_rate": 3e-06, | |
| "loss": -25.6929, | |
| "step": 1724 | |
| }, | |
| { | |
| "epoch": 0.15333333333333332, | |
| "grad_norm": 951.7434692382812, | |
| "learning_rate": 3e-06, | |
| "loss": -43.7247, | |
| "step": 1725 | |
| }, | |
| { | |
| "epoch": 0.1534222222222222, | |
| "grad_norm": 457.6904602050781, | |
| "learning_rate": 3e-06, | |
| "loss": -27.8251, | |
| "step": 1726 | |
| }, | |
| { | |
| "epoch": 0.1535111111111111, | |
| "grad_norm": 433.86907958984375, | |
| "learning_rate": 3e-06, | |
| "loss": -37.9928, | |
| "step": 1727 | |
| }, | |
| { | |
| "epoch": 0.1536, | |
| "grad_norm": 506.2419128417969, | |
| "learning_rate": 3e-06, | |
| "loss": -31.6119, | |
| "step": 1728 | |
| }, | |
| { | |
| "completion_length": 250.25000762939453, | |
| "epoch": 0.1536888888888889, | |
| "grad_norm": 638.6153564453125, | |
| "learning_rate": 3e-06, | |
| "loss": -60.7029, | |
| "reward": 1.4895833730697632, | |
| "reward_std": 0.7616997957229614, | |
| "rewards/boxed_and_answer_tags_format_reward": 0.7395833432674408, | |
| "rewards/correctness_reward_func_math": 0.75, | |
| "step": 1729, | |
| "zero_std_ratio": 0.125 | |
| }, | |
| { | |
| "epoch": 0.1537777777777778, | |
| "grad_norm": 645.9075927734375, | |
| "learning_rate": 3e-06, | |
| "loss": -100.7913, | |
| "step": 1730 | |
| }, | |
| { | |
| "epoch": 0.15386666666666668, | |
| "grad_norm": 769.6129760742188, | |
| "learning_rate": 3e-06, | |
| "loss": -41.8559, | |
| "step": 1731 | |
| }, | |
| { | |
| "epoch": 0.15395555555555557, | |
| "grad_norm": 594.3479614257812, | |
| "learning_rate": 3e-06, | |
| "loss": -85.2107, | |
| "step": 1732 | |
| }, | |
| { | |
| "epoch": 0.15404444444444446, | |
| "grad_norm": 513.4801635742188, | |
| "learning_rate": 3e-06, | |
| "loss": -78.7403, | |
| "step": 1733 | |
| }, | |
| { | |
| "epoch": 0.15413333333333334, | |
| "grad_norm": 596.7926635742188, | |
| "learning_rate": 3e-06, | |
| "loss": -38.3655, | |
| "step": 1734 | |
| }, | |
| { | |
| "epoch": 0.15422222222222223, | |
| "grad_norm": 762.2822875976562, | |
| "learning_rate": 3e-06, | |
| "loss": -68.4507, | |
| "step": 1735 | |
| }, | |
| { | |
| "epoch": 0.15431111111111112, | |
| "grad_norm": 507.63958740234375, | |
| "learning_rate": 3e-06, | |
| "loss": -108.803, | |
| "step": 1736 | |
| }, | |
| { | |
| "epoch": 0.1544, | |
| "grad_norm": 657.9226684570312, | |
| "learning_rate": 3e-06, | |
| "loss": -56.9832, | |
| "step": 1737 | |
| }, | |
| { | |
| "epoch": 0.1544888888888889, | |
| "grad_norm": 604.657958984375, | |
| "learning_rate": 3e-06, | |
| "loss": -96.6592, | |
| "step": 1738 | |
| }, | |
| { | |
| "epoch": 0.1545777777777778, | |
| "grad_norm": 491.42047119140625, | |
| "learning_rate": 3e-06, | |
| "loss": -87.8488, | |
| "step": 1739 | |
| }, | |
| { | |
| "epoch": 0.15466666666666667, | |
| "grad_norm": 542.5538940429688, | |
| "learning_rate": 3e-06, | |
| "loss": -52.5061, | |
| "step": 1740 | |
| }, | |
| { | |
| "completion_length": 241.3125, | |
| "epoch": 0.15475555555555556, | |
| "grad_norm": 257.529541015625, | |
| "learning_rate": 3e-06, | |
| "loss": -24.5069, | |
| "reward": 1.1250000596046448, | |
| "reward_std": 0.23116152733564377, | |
| "rewards/boxed_and_answer_tags_format_reward": 0.75, | |
| "rewards/correctness_reward_func_math": 0.375, | |
| "step": 1741, | |
| "zero_std_ratio": 0.75 | |
| }, | |
| { | |
| "epoch": 0.15484444444444445, | |
| "grad_norm": 272.9305725097656, | |
| "learning_rate": 3e-06, | |
| "loss": -31.0682, | |
| "step": 1742 | |
| }, | |
| { | |
| "epoch": 0.15493333333333334, | |
| "grad_norm": 241.25071716308594, | |
| "learning_rate": 3e-06, | |
| "loss": -32.0895, | |
| "step": 1743 | |
| }, | |
| { | |
| "epoch": 0.15502222222222223, | |
| "grad_norm": 208.89321899414062, | |
| "learning_rate": 3e-06, | |
| "loss": -28.8566, | |
| "step": 1744 | |
| }, | |
| { | |
| "epoch": 0.15511111111111112, | |
| "grad_norm": 186.21788024902344, | |
| "learning_rate": 3e-06, | |
| "loss": -25.5258, | |
| "step": 1745 | |
| }, | |
| { | |
| "epoch": 0.1552, | |
| "grad_norm": 208.84288024902344, | |
| "learning_rate": 3e-06, | |
| "loss": -36.6331, | |
| "step": 1746 | |
| }, | |
| { | |
| "epoch": 0.1552888888888889, | |
| "grad_norm": 273.28900146484375, | |
| "learning_rate": 3e-06, | |
| "loss": -26.8255, | |
| "step": 1747 | |
| }, | |
| { | |
| "epoch": 0.15537777777777778, | |
| "grad_norm": 255.3370361328125, | |
| "learning_rate": 3e-06, | |
| "loss": -35.3528, | |
| "step": 1748 | |
| }, | |
| { | |
| "epoch": 0.15546666666666667, | |
| "grad_norm": 265.7087097167969, | |
| "learning_rate": 3e-06, | |
| "loss": -37.0957, | |
| "step": 1749 | |
| }, | |
| { | |
| "epoch": 0.15555555555555556, | |
| "grad_norm": 174.1486358642578, | |
| "learning_rate": 3e-06, | |
| "loss": -31.8935, | |
| "step": 1750 | |
| }, | |
| { | |
| "epoch": 0.15564444444444445, | |
| "grad_norm": 208.65518188476562, | |
| "learning_rate": 3e-06, | |
| "loss": -29.0371, | |
| "step": 1751 | |
| }, | |
| { | |
| "epoch": 0.15573333333333333, | |
| "grad_norm": 172.7078857421875, | |
| "learning_rate": 3e-06, | |
| "loss": -39.3675, | |
| "step": 1752 | |
| }, | |
| { | |
| "completion_length": 252.39583587646484, | |
| "epoch": 0.15582222222222222, | |
| "grad_norm": 451.1358947753906, | |
| "learning_rate": 3e-06, | |
| "loss": -66.2963, | |
| "reward": 1.3541667461395264, | |
| "reward_std": 0.6070848852396011, | |
| "rewards/boxed_and_answer_tags_format_reward": 0.6875, | |
| "rewards/correctness_reward_func_math": 0.6666666716337204, | |
| "step": 1753, | |
| "zero_std_ratio": 0.375 | |
| }, | |
| { | |
| "epoch": 0.1559111111111111, | |
| "grad_norm": 447.5511169433594, | |
| "learning_rate": 3e-06, | |
| "loss": -65.7659, | |
| "step": 1754 | |
| }, | |
| { | |
| "epoch": 0.156, | |
| "grad_norm": 481.50335693359375, | |
| "learning_rate": 3e-06, | |
| "loss": -67.9006, | |
| "step": 1755 | |
| }, | |
| { | |
| "epoch": 0.1560888888888889, | |
| "grad_norm": 418.3782043457031, | |
| "learning_rate": 3e-06, | |
| "loss": -59.6285, | |
| "step": 1756 | |
| }, | |
| { | |
| "epoch": 0.15617777777777778, | |
| "grad_norm": 487.20574951171875, | |
| "learning_rate": 3e-06, | |
| "loss": -82.0834, | |
| "step": 1757 | |
| }, | |
| { | |
| "epoch": 0.15626666666666666, | |
| "grad_norm": 454.06463623046875, | |
| "learning_rate": 3e-06, | |
| "loss": -57.3851, | |
| "step": 1758 | |
| }, | |
| { | |
| "epoch": 0.15635555555555555, | |
| "grad_norm": 408.6988830566406, | |
| "learning_rate": 3e-06, | |
| "loss": -69.432, | |
| "step": 1759 | |
| }, | |
| { | |
| "epoch": 0.15644444444444444, | |
| "grad_norm": 399.9183349609375, | |
| "learning_rate": 3e-06, | |
| "loss": -71.3301, | |
| "step": 1760 | |
| }, | |
| { | |
| "epoch": 0.15653333333333333, | |
| "grad_norm": 577.1817626953125, | |
| "learning_rate": 3e-06, | |
| "loss": -74.2775, | |
| "step": 1761 | |
| }, | |
| { | |
| "epoch": 0.15662222222222222, | |
| "grad_norm": 413.5326843261719, | |
| "learning_rate": 3e-06, | |
| "loss": -63.2282, | |
| "step": 1762 | |
| }, | |
| { | |
| "epoch": 0.1567111111111111, | |
| "grad_norm": 498.8305358886719, | |
| "learning_rate": 3e-06, | |
| "loss": -88.1602, | |
| "step": 1763 | |
| }, | |
| { | |
| "epoch": 0.1568, | |
| "grad_norm": 530.10595703125, | |
| "learning_rate": 3e-06, | |
| "loss": -64.1951, | |
| "step": 1764 | |
| }, | |
| { | |
| "completion_length": 247.00000762939453, | |
| "epoch": 0.15688888888888888, | |
| "grad_norm": 433.1192321777344, | |
| "learning_rate": 3e-06, | |
| "loss": -27.1999, | |
| "reward": 1.6458333730697632, | |
| "reward_std": 0.3602609783411026, | |
| "rewards/boxed_and_answer_tags_format_reward": 0.6875, | |
| "rewards/correctness_reward_func_math": 0.9583333432674408, | |
| "step": 1765, | |
| "zero_std_ratio": 0.625 | |
| }, | |
| { | |
| "epoch": 0.15697777777777777, | |
| "grad_norm": 350.172119140625, | |
| "learning_rate": 3e-06, | |
| "loss": -32.7846, | |
| "step": 1766 | |
| }, | |
| { | |
| "epoch": 0.15706666666666666, | |
| "grad_norm": 443.9689636230469, | |
| "learning_rate": 3e-06, | |
| "loss": -25.8718, | |
| "step": 1767 | |
| }, | |
| { | |
| "epoch": 0.15715555555555555, | |
| "grad_norm": 491.6800537109375, | |
| "learning_rate": 3e-06, | |
| "loss": -21.288, | |
| "step": 1768 | |
| }, | |
| { | |
| "epoch": 0.15724444444444444, | |
| "grad_norm": 490.1741943359375, | |
| "learning_rate": 3e-06, | |
| "loss": -30.8959, | |
| "step": 1769 | |
| }, | |
| { | |
| "epoch": 0.15733333333333333, | |
| "grad_norm": 403.3340759277344, | |
| "learning_rate": 3e-06, | |
| "loss": -17.7178, | |
| "step": 1770 | |
| }, | |
| { | |
| "epoch": 0.1574222222222222, | |
| "grad_norm": 434.2870178222656, | |
| "learning_rate": 3e-06, | |
| "loss": -28.6675, | |
| "step": 1771 | |
| }, | |
| { | |
| "epoch": 0.1575111111111111, | |
| "grad_norm": 390.14208984375, | |
| "learning_rate": 3e-06, | |
| "loss": -35.2631, | |
| "step": 1772 | |
| }, | |
| { | |
| "epoch": 0.1576, | |
| "grad_norm": 545.4449462890625, | |
| "learning_rate": 3e-06, | |
| "loss": -27.2021, | |
| "step": 1773 | |
| }, | |
| { | |
| "epoch": 0.15768888888888888, | |
| "grad_norm": 409.76416015625, | |
| "learning_rate": 3e-06, | |
| "loss": -25.2074, | |
| "step": 1774 | |
| }, | |
| { | |
| "epoch": 0.15777777777777777, | |
| "grad_norm": 1407.31787109375, | |
| "learning_rate": 3e-06, | |
| "loss": -32.1991, | |
| "step": 1775 | |
| }, | |
| { | |
| "epoch": 0.15786666666666666, | |
| "grad_norm": 409.153076171875, | |
| "learning_rate": 3e-06, | |
| "loss": -21.3904, | |
| "step": 1776 | |
| }, | |
| { | |
| "completion_length": 255.6666717529297, | |
| "epoch": 0.15795555555555554, | |
| "grad_norm": 250.8477020263672, | |
| "learning_rate": 3e-06, | |
| "loss": -20.1606, | |
| "reward": 1.0625000596046448, | |
| "reward_std": 0.23116152733564377, | |
| "rewards/boxed_and_answer_tags_format_reward": 0.6875, | |
| "rewards/correctness_reward_func_math": 0.3750000111758709, | |
| "step": 1777, | |
| "zero_std_ratio": 0.75 | |
| }, | |
| { | |
| "epoch": 0.15804444444444443, | |
| "grad_norm": 295.9480895996094, | |
| "learning_rate": 3e-06, | |
| "loss": -4.3906, | |
| "step": 1778 | |
| }, | |
| { | |
| "epoch": 0.15813333333333332, | |
| "grad_norm": 264.98590087890625, | |
| "learning_rate": 3e-06, | |
| "loss": 1.1149, | |
| "step": 1779 | |
| }, | |
| { | |
| "epoch": 0.1582222222222222, | |
| "grad_norm": 282.2425537109375, | |
| "learning_rate": 3e-06, | |
| "loss": -13.9569, | |
| "step": 1780 | |
| }, | |
| { | |
| "epoch": 0.15831111111111112, | |
| "grad_norm": 269.65191650390625, | |
| "learning_rate": 3e-06, | |
| "loss": -12.0395, | |
| "step": 1781 | |
| }, | |
| { | |
| "epoch": 0.1584, | |
| "grad_norm": 262.14825439453125, | |
| "learning_rate": 3e-06, | |
| "loss": -17.0425, | |
| "step": 1782 | |
| }, | |
| { | |
| "epoch": 0.1584888888888889, | |
| "grad_norm": 226.9910888671875, | |
| "learning_rate": 3e-06, | |
| "loss": -22.3367, | |
| "step": 1783 | |
| }, | |
| { | |
| "epoch": 0.1585777777777778, | |
| "grad_norm": 268.9870300292969, | |
| "learning_rate": 3e-06, | |
| "loss": -7.3806, | |
| "step": 1784 | |
| }, | |
| { | |
| "epoch": 0.15866666666666668, | |
| "grad_norm": 252.59866333007812, | |
| "learning_rate": 3e-06, | |
| "loss": -2.0587, | |
| "step": 1785 | |
| }, | |
| { | |
| "epoch": 0.15875555555555557, | |
| "grad_norm": 285.8102111816406, | |
| "learning_rate": 3e-06, | |
| "loss": -17.0219, | |
| "step": 1786 | |
| }, | |
| { | |
| "epoch": 0.15884444444444445, | |
| "grad_norm": 281.6475830078125, | |
| "learning_rate": 3e-06, | |
| "loss": -16.6806, | |
| "step": 1787 | |
| }, | |
| { | |
| "epoch": 0.15893333333333334, | |
| "grad_norm": 218.4907989501953, | |
| "learning_rate": 3e-06, | |
| "loss": -22.657, | |
| "step": 1788 | |
| }, | |
| { | |
| "completion_length": 252.0625, | |
| "epoch": 0.15902222222222223, | |
| "grad_norm": 365.0342102050781, | |
| "learning_rate": 3e-06, | |
| "loss": 4.7637, | |
| "reward": 1.0416666865348816, | |
| "reward_std": 0.23899271339178085, | |
| "rewards/boxed_and_answer_tags_format_reward": 0.625, | |
| "rewards/correctness_reward_func_math": 0.4166666679084301, | |
| "step": 1789, | |
| "zero_std_ratio": 0.75 | |
| }, | |
| { | |
| "epoch": 0.15911111111111112, | |
| "grad_norm": 306.0011291503906, | |
| "learning_rate": 3e-06, | |
| "loss": -12.1321, | |
| "step": 1790 | |
| }, | |
| { | |
| "epoch": 0.1592, | |
| "grad_norm": 385.5239562988281, | |
| "learning_rate": 3e-06, | |
| "loss": -11.9155, | |
| "step": 1791 | |
| }, | |
| { | |
| "epoch": 0.1592888888888889, | |
| "grad_norm": 502.8174743652344, | |
| "learning_rate": 3e-06, | |
| "loss": -9.7161, | |
| "step": 1792 | |
| }, | |
| { | |
| "epoch": 0.15937777777777778, | |
| "grad_norm": 286.40557861328125, | |
| "learning_rate": 3e-06, | |
| "loss": -0.1257, | |
| "step": 1793 | |
| }, | |
| { | |
| "epoch": 0.15946666666666667, | |
| "grad_norm": 250.90745544433594, | |
| "learning_rate": 3e-06, | |
| "loss": -5.5968, | |
| "step": 1794 | |
| }, | |
| { | |
| "epoch": 0.15955555555555556, | |
| "grad_norm": 337.0714416503906, | |
| "learning_rate": 3e-06, | |
| "loss": 1.3961, | |
| "step": 1795 | |
| }, | |
| { | |
| "epoch": 0.15964444444444445, | |
| "grad_norm": 279.7541198730469, | |
| "learning_rate": 3e-06, | |
| "loss": -15.7637, | |
| "step": 1796 | |
| }, | |
| { | |
| "epoch": 0.15973333333333334, | |
| "grad_norm": 293.27703857421875, | |
| "learning_rate": 3e-06, | |
| "loss": -16.9533, | |
| "step": 1797 | |
| }, | |
| { | |
| "epoch": 0.15982222222222223, | |
| "grad_norm": 398.4286193847656, | |
| "learning_rate": 3e-06, | |
| "loss": -14.4282, | |
| "step": 1798 | |
| }, | |
| { | |
| "epoch": 0.15991111111111111, | |
| "grad_norm": 314.15338134765625, | |
| "learning_rate": 3e-06, | |
| "loss": -2.8244, | |
| "step": 1799 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "grad_norm": 302.6134338378906, | |
| "learning_rate": 3e-06, | |
| "loss": -11.5238, | |
| "step": 1800 | |
| }, | |
| { | |
| "completion_length": 251.93750762939453, | |
| "epoch": 0.1600888888888889, | |
| "grad_norm": 598.98095703125, | |
| "learning_rate": 3e-06, | |
| "loss": -25.8615, | |
| "reward": 1.5, | |
| "reward_std": 0.20412415266036987, | |
| "rewards/boxed_and_answer_tags_format_reward": 0.75, | |
| "rewards/correctness_reward_func_math": 0.75, | |
| "step": 1801, | |
| "zero_std_ratio": 0.75 | |
| }, | |
| { | |
| "epoch": 0.16017777777777778, | |
| "grad_norm": 618.0178833007812, | |
| "learning_rate": 3e-06, | |
| "loss": -21.1122, | |
| "step": 1802 | |
| }, | |
| { | |
| "epoch": 0.16026666666666667, | |
| "grad_norm": 541.5601806640625, | |
| "learning_rate": 3e-06, | |
| "loss": -11.0087, | |
| "step": 1803 | |
| }, | |
| { | |
| "epoch": 0.16035555555555556, | |
| "grad_norm": 470.309814453125, | |
| "learning_rate": 3e-06, | |
| "loss": -22.2437, | |
| "step": 1804 | |
| }, | |
| { | |
| "epoch": 0.16044444444444445, | |
| "grad_norm": 413.4715270996094, | |
| "learning_rate": 3e-06, | |
| "loss": -33.0321, | |
| "step": 1805 | |
| }, | |
| { | |
| "epoch": 0.16053333333333333, | |
| "grad_norm": 376.2085266113281, | |
| "learning_rate": 3e-06, | |
| "loss": -25.3322, | |
| "step": 1806 | |
| }, | |
| { | |
| "epoch": 0.16062222222222222, | |
| "grad_norm": 322.1478576660156, | |
| "learning_rate": 3e-06, | |
| "loss": -37.8067, | |
| "step": 1807 | |
| }, | |
| { | |
| "epoch": 0.1607111111111111, | |
| "grad_norm": 272.68951416015625, | |
| "learning_rate": 3e-06, | |
| "loss": -34.8264, | |
| "step": 1808 | |
| }, | |
| { | |
| "epoch": 0.1608, | |
| "grad_norm": 365.9797058105469, | |
| "learning_rate": 3e-06, | |
| "loss": -24.8187, | |
| "step": 1809 | |
| }, | |
| { | |
| "epoch": 0.1608888888888889, | |
| "grad_norm": 314.506591796875, | |
| "learning_rate": 3e-06, | |
| "loss": -32.5667, | |
| "step": 1810 | |
| }, | |
| { | |
| "epoch": 0.16097777777777778, | |
| "grad_norm": 410.4781494140625, | |
| "learning_rate": 3e-06, | |
| "loss": -43.7721, | |
| "step": 1811 | |
| }, | |
| { | |
| "epoch": 0.16106666666666666, | |
| "grad_norm": 323.0709228515625, | |
| "learning_rate": 3e-06, | |
| "loss": -32.8399, | |
| "step": 1812 | |
| }, | |
| { | |
| "completion_length": 253.14583587646484, | |
| "epoch": 0.16115555555555555, | |
| "grad_norm": 490.2536315917969, | |
| "learning_rate": 3e-06, | |
| "loss": -24.4492, | |
| "reward": 1.4375000596046448, | |
| "reward_std": 0.3680921420454979, | |
| "rewards/boxed_and_answer_tags_format_reward": 0.6875, | |
| "rewards/correctness_reward_func_math": 0.7500000149011612, | |
| "step": 1813, | |
| "zero_std_ratio": 0.625 | |
| }, | |
| { | |
| "epoch": 0.16124444444444444, | |
| "grad_norm": 376.43231201171875, | |
| "learning_rate": 3e-06, | |
| "loss": -22.8531, | |
| "step": 1814 | |
| }, | |
| { | |
| "epoch": 0.16133333333333333, | |
| "grad_norm": 370.8981018066406, | |
| "learning_rate": 3e-06, | |
| "loss": -6.1589, | |
| "step": 1815 | |
| }, | |
| { | |
| "epoch": 0.16142222222222222, | |
| "grad_norm": 350.6385498046875, | |
| "learning_rate": 3e-06, | |
| "loss": -17.2146, | |
| "step": 1816 | |
| }, | |
| { | |
| "epoch": 0.1615111111111111, | |
| "grad_norm": 413.9906311035156, | |
| "learning_rate": 3e-06, | |
| "loss": -13.1823, | |
| "step": 1817 | |
| }, | |
| { | |
| "epoch": 0.1616, | |
| "grad_norm": 511.4176940917969, | |
| "learning_rate": 3e-06, | |
| "loss": -23.6257, | |
| "step": 1818 | |
| }, | |
| { | |
| "epoch": 0.16168888888888888, | |
| "grad_norm": 425.5303039550781, | |
| "learning_rate": 3e-06, | |
| "loss": -29.0499, | |
| "step": 1819 | |
| }, | |
| { | |
| "epoch": 0.16177777777777777, | |
| "grad_norm": 329.032958984375, | |
| "learning_rate": 3e-06, | |
| "loss": -28.0294, | |
| "step": 1820 | |
| }, | |
| { | |
| "epoch": 0.16186666666666666, | |
| "grad_norm": 482.425537109375, | |
| "learning_rate": 3e-06, | |
| "loss": -8.7538, | |
| "step": 1821 | |
| }, | |
| { | |
| "epoch": 0.16195555555555555, | |
| "grad_norm": 422.88494873046875, | |
| "learning_rate": 3e-06, | |
| "loss": -22.7747, | |
| "step": 1822 | |
| }, | |
| { | |
| "epoch": 0.16204444444444444, | |
| "grad_norm": 394.9844055175781, | |
| "learning_rate": 3e-06, | |
| "loss": -18.7768, | |
| "step": 1823 | |
| }, | |
| { | |
| "epoch": 0.16213333333333332, | |
| "grad_norm": 449.5504455566406, | |
| "learning_rate": 3e-06, | |
| "loss": -25.7761, | |
| "step": 1824 | |
| }, | |
| { | |
| "completion_length": 240.8541717529297, | |
| "epoch": 0.1622222222222222, | |
| "grad_norm": 474.4821472167969, | |
| "learning_rate": 3e-06, | |
| "loss": 5.9184, | |
| "reward": 1.5416666865348816, | |
| "reward_std": 0.3602609857916832, | |
| "rewards/boxed_and_answer_tags_format_reward": 0.75, | |
| "rewards/correctness_reward_func_math": 0.7916666865348816, | |
| "step": 1825, | |
| "zero_std_ratio": 0.625 | |
| }, | |
| { | |
| "epoch": 0.1623111111111111, | |
| "grad_norm": 394.9033203125, | |
| "learning_rate": 3e-06, | |
| "loss": -8.6183, | |
| "step": 1826 | |
| }, | |
| { | |
| "epoch": 0.1624, | |
| "grad_norm": 585.3305053710938, | |
| "learning_rate": 3e-06, | |
| "loss": 17.2736, | |
| "step": 1827 | |
| }, | |
| { | |
| "epoch": 0.16248888888888888, | |
| "grad_norm": 411.5712585449219, | |
| "learning_rate": 3e-06, | |
| "loss": 2.0057, | |
| "step": 1828 | |
| }, | |
| { | |
| "epoch": 0.16257777777777777, | |
| "grad_norm": 326.8497619628906, | |
| "learning_rate": 3e-06, | |
| "loss": 5.208, | |
| "step": 1829 | |
| }, | |
| { | |
| "epoch": 0.16266666666666665, | |
| "grad_norm": 621.4788208007812, | |
| "learning_rate": 3e-06, | |
| "loss": -6.1221, | |
| "step": 1830 | |
| }, | |
| { | |
| "epoch": 0.16275555555555554, | |
| "grad_norm": 628.4518432617188, | |
| "learning_rate": 3e-06, | |
| "loss": 3.4633, | |
| "step": 1831 | |
| }, | |
| { | |
| "epoch": 0.16284444444444446, | |
| "grad_norm": 389.485595703125, | |
| "learning_rate": 3e-06, | |
| "loss": -14.53, | |
| "step": 1832 | |
| }, | |
| { | |
| "epoch": 0.16293333333333335, | |
| "grad_norm": 760.7333374023438, | |
| "learning_rate": 3e-06, | |
| "loss": 11.3702, | |
| "step": 1833 | |
| }, | |
| { | |
| "epoch": 0.16302222222222224, | |
| "grad_norm": 327.4570617675781, | |
| "learning_rate": 3e-06, | |
| "loss": -4.0738, | |
| "step": 1834 | |
| }, | |
| { | |
| "epoch": 0.16311111111111112, | |
| "grad_norm": 325.6021728515625, | |
| "learning_rate": 3e-06, | |
| "loss": -0.9882, | |
| "step": 1835 | |
| }, | |
| { | |
| "epoch": 0.1632, | |
| "grad_norm": 434.9088439941406, | |
| "learning_rate": 3e-06, | |
| "loss": -14.5048, | |
| "step": 1836 | |
| }, | |
| { | |
| "completion_length": 252.3541717529297, | |
| "epoch": 0.1632888888888889, | |
| "grad_norm": 364.32318115234375, | |
| "learning_rate": 3e-06, | |
| "loss": 5.0981, | |
| "reward": 1.520833432674408, | |
| "reward_std": 0.20412413775920868, | |
| "rewards/boxed_and_answer_tags_format_reward": 0.6875, | |
| "rewards/correctness_reward_func_math": 0.8333333544433117, | |
| "step": 1837, | |
| "zero_std_ratio": 0.75 | |
| }, | |
| { | |
| "epoch": 0.1633777777777778, | |
| "grad_norm": 327.6516418457031, | |
| "learning_rate": 3e-06, | |
| "loss": 13.1354, | |
| "step": 1838 | |
| }, | |
| { | |
| "epoch": 0.16346666666666668, | |
| "grad_norm": 327.8387145996094, | |
| "learning_rate": 3e-06, | |
| "loss": 7.7064, | |
| "step": 1839 | |
| }, | |
| { | |
| "epoch": 0.16355555555555557, | |
| "grad_norm": 483.0592346191406, | |
| "learning_rate": 3e-06, | |
| "loss": 9.8312, | |
| "step": 1840 | |
| }, | |
| { | |
| "epoch": 0.16364444444444445, | |
| "grad_norm": 409.86724853515625, | |
| "learning_rate": 3e-06, | |
| "loss": 13.0568, | |
| "step": 1841 | |
| }, | |
| { | |
| "epoch": 0.16373333333333334, | |
| "grad_norm": 282.73626708984375, | |
| "learning_rate": 3e-06, | |
| "loss": 3.5377, | |
| "step": 1842 | |
| }, | |
| { | |
| "epoch": 0.16382222222222223, | |
| "grad_norm": 262.7396240234375, | |
| "learning_rate": 3e-06, | |
| "loss": -0.2709, | |
| "step": 1843 | |
| }, | |
| { | |
| "epoch": 0.16391111111111112, | |
| "grad_norm": 375.62359619140625, | |
| "learning_rate": 3e-06, | |
| "loss": 8.4106, | |
| "step": 1844 | |
| }, | |
| { | |
| "epoch": 0.164, | |
| "grad_norm": 295.78814697265625, | |
| "learning_rate": 3e-06, | |
| "loss": 2.2675, | |
| "step": 1845 | |
| }, | |
| { | |
| "epoch": 0.1640888888888889, | |
| "grad_norm": 555.635009765625, | |
| "learning_rate": 3e-06, | |
| "loss": 1.7638, | |
| "step": 1846 | |
| }, | |
| { | |
| "epoch": 0.16417777777777778, | |
| "grad_norm": 362.09722900390625, | |
| "learning_rate": 3e-06, | |
| "loss": 2.102, | |
| "step": 1847 | |
| }, | |
| { | |
| "epoch": 0.16426666666666667, | |
| "grad_norm": 349.70440673828125, | |
| "learning_rate": 3e-06, | |
| "loss": -2.454, | |
| "step": 1848 | |
| }, | |
| { | |
| "completion_length": 249.08333587646484, | |
| "epoch": 0.16435555555555556, | |
| "grad_norm": 437.3484802246094, | |
| "learning_rate": 3e-06, | |
| "loss": -29.0477, | |
| "reward": 1.4166666865348816, | |
| "reward_std": 0.4701542556285858, | |
| "rewards/boxed_and_answer_tags_format_reward": 0.625, | |
| "rewards/correctness_reward_func_math": 0.7916666567325592, | |
| "step": 1849, | |
| "zero_std_ratio": 0.5 | |
| }, | |
| { | |
| "epoch": 0.16444444444444445, | |
| "grad_norm": 638.8463745117188, | |
| "learning_rate": 3e-06, | |
| "loss": -47.733, | |
| "step": 1850 | |
| }, | |
| { | |
| "epoch": 0.16453333333333334, | |
| "grad_norm": 460.33624267578125, | |
| "learning_rate": 3e-06, | |
| "loss": -34.4307, | |
| "step": 1851 | |
| }, | |
| { | |
| "epoch": 0.16462222222222223, | |
| "grad_norm": 491.926025390625, | |
| "learning_rate": 3e-06, | |
| "loss": -52.2731, | |
| "step": 1852 | |
| }, | |
| { | |
| "epoch": 0.1647111111111111, | |
| "grad_norm": 607.3854370117188, | |
| "learning_rate": 3e-06, | |
| "loss": -47.2157, | |
| "step": 1853 | |
| }, | |
| { | |
| "epoch": 0.1648, | |
| "grad_norm": 512.1332397460938, | |
| "learning_rate": 3e-06, | |
| "loss": -50.0155, | |
| "step": 1854 | |
| }, | |
| { | |
| "epoch": 0.1648888888888889, | |
| "grad_norm": 403.7186279296875, | |
| "learning_rate": 3e-06, | |
| "loss": -33.8899, | |
| "step": 1855 | |
| }, | |
| { | |
| "epoch": 0.16497777777777778, | |
| "grad_norm": 423.3454284667969, | |
| "learning_rate": 3e-06, | |
| "loss": -51.1046, | |
| "step": 1856 | |
| }, | |
| { | |
| "epoch": 0.16506666666666667, | |
| "grad_norm": 615.0731811523438, | |
| "learning_rate": 3e-06, | |
| "loss": -36.3091, | |
| "step": 1857 | |
| }, | |
| { | |
| "epoch": 0.16515555555555556, | |
| "grad_norm": 557.7341918945312, | |
| "learning_rate": 3e-06, | |
| "loss": -58.0938, | |
| "step": 1858 | |
| }, | |
| { | |
| "epoch": 0.16524444444444444, | |
| "grad_norm": 575.3082885742188, | |
| "learning_rate": 3e-06, | |
| "loss": -56.4051, | |
| "step": 1859 | |
| }, | |
| { | |
| "epoch": 0.16533333333333333, | |
| "grad_norm": 530.2061157226562, | |
| "learning_rate": 3e-06, | |
| "loss": -57.1385, | |
| "step": 1860 | |
| }, | |
| { | |
| "completion_length": 229.4791717529297, | |
| "epoch": 0.16542222222222222, | |
| "grad_norm": 290.0047912597656, | |
| "learning_rate": 3e-06, | |
| "loss": -26.4189, | |
| "reward": 1.4375000596046448, | |
| "reward_std": 0.3332236111164093, | |
| "rewards/boxed_and_answer_tags_format_reward": 0.6875, | |
| "rewards/correctness_reward_func_math": 0.75, | |
| "step": 1861, | |
| "zero_std_ratio": 0.625 | |
| }, | |
| { | |
| "epoch": 0.1655111111111111, | |
| "grad_norm": 343.6731262207031, | |
| "learning_rate": 3e-06, | |
| "loss": -30.5266, | |
| "step": 1862 | |
| }, | |
| { | |
| "epoch": 0.1656, | |
| "grad_norm": 321.04400634765625, | |
| "learning_rate": 3e-06, | |
| "loss": -26.9367, | |
| "step": 1863 | |
| }, | |
| { | |
| "epoch": 0.16568888888888889, | |
| "grad_norm": 423.7695007324219, | |
| "learning_rate": 3e-06, | |
| "loss": -20.3331, | |
| "step": 1864 | |
| }, | |
| { | |
| "epoch": 0.16577777777777777, | |
| "grad_norm": 301.9991760253906, | |
| "learning_rate": 3e-06, | |
| "loss": -22.0128, | |
| "step": 1865 | |
| }, | |
| { | |
| "epoch": 0.16586666666666666, | |
| "grad_norm": 266.446533203125, | |
| "learning_rate": 3e-06, | |
| "loss": -13.6451, | |
| "step": 1866 | |
| }, | |
| { | |
| "epoch": 0.16595555555555555, | |
| "grad_norm": 289.8598937988281, | |
| "learning_rate": 3e-06, | |
| "loss": -29.9844, | |
| "step": 1867 | |
| }, | |
| { | |
| "epoch": 0.16604444444444444, | |
| "grad_norm": 474.86016845703125, | |
| "learning_rate": 3e-06, | |
| "loss": -32.2196, | |
| "step": 1868 | |
| }, | |
| { | |
| "epoch": 0.16613333333333333, | |
| "grad_norm": 314.1653137207031, | |
| "learning_rate": 3e-06, | |
| "loss": -29.7642, | |
| "step": 1869 | |
| }, | |
| { | |
| "epoch": 0.16622222222222222, | |
| "grad_norm": 362.4693603515625, | |
| "learning_rate": 3e-06, | |
| "loss": -24.4635, | |
| "step": 1870 | |
| }, | |
| { | |
| "epoch": 0.1663111111111111, | |
| "grad_norm": 312.52569580078125, | |
| "learning_rate": 3e-06, | |
| "loss": -24.009, | |
| "step": 1871 | |
| }, | |
| { | |
| "epoch": 0.1664, | |
| "grad_norm": 259.9459228515625, | |
| "learning_rate": 3e-06, | |
| "loss": -18.4052, | |
| "step": 1872 | |
| }, | |
| { | |
| "completion_length": 250.8541717529297, | |
| "epoch": 0.16648888888888888, | |
| "grad_norm": 374.1226501464844, | |
| "learning_rate": 3e-06, | |
| "loss": -4.5437, | |
| "reward": 1.6666667461395264, | |
| "reward_std": 0.20412414520978928, | |
| "rewards/boxed_and_answer_tags_format_reward": 0.75, | |
| "rewards/correctness_reward_func_math": 0.9166666865348816, | |
| "step": 1873, | |
| "zero_std_ratio": 0.75 | |
| }, | |
| { | |
| "epoch": 0.16657777777777777, | |
| "grad_norm": 501.4902038574219, | |
| "learning_rate": 3e-06, | |
| "loss": -16.285, | |
| "step": 1874 | |
| }, | |
| { | |
| "epoch": 0.16666666666666666, | |
| "grad_norm": 511.9213562011719, | |
| "learning_rate": 3e-06, | |
| "loss": -5.965, | |
| "step": 1875 | |
| }, | |
| { | |
| "epoch": 0.16675555555555555, | |
| "grad_norm": 377.8996276855469, | |
| "learning_rate": 3e-06, | |
| "loss": -11.742, | |
| "step": 1876 | |
| }, | |
| { | |
| "epoch": 0.16684444444444443, | |
| "grad_norm": 275.3067321777344, | |
| "learning_rate": 3e-06, | |
| "loss": -2.8953, | |
| "step": 1877 | |
| }, | |
| { | |
| "epoch": 0.16693333333333332, | |
| "grad_norm": 311.0989990234375, | |
| "learning_rate": 3e-06, | |
| "loss": 3.1096, | |
| "step": 1878 | |
| }, | |
| { | |
| "epoch": 0.1670222222222222, | |
| "grad_norm": 408.4767761230469, | |
| "learning_rate": 3e-06, | |
| "loss": -5.1197, | |
| "step": 1879 | |
| }, | |
| { | |
| "epoch": 0.1671111111111111, | |
| "grad_norm": 411.1174621582031, | |
| "learning_rate": 3e-06, | |
| "loss": -20.4177, | |
| "step": 1880 | |
| }, | |
| { | |
| "epoch": 0.1672, | |
| "grad_norm": 432.32159423828125, | |
| "learning_rate": 3e-06, | |
| "loss": -14.0135, | |
| "step": 1881 | |
| }, | |
| { | |
| "epoch": 0.16728888888888888, | |
| "grad_norm": 637.0897216796875, | |
| "learning_rate": 3e-06, | |
| "loss": -17.1271, | |
| "step": 1882 | |
| }, | |
| { | |
| "epoch": 0.16737777777777776, | |
| "grad_norm": 277.6168212890625, | |
| "learning_rate": 3e-06, | |
| "loss": -7.087, | |
| "step": 1883 | |
| }, | |
| { | |
| "epoch": 0.16746666666666668, | |
| "grad_norm": 364.540283203125, | |
| "learning_rate": 3e-06, | |
| "loss": 0.8844, | |
| "step": 1884 | |
| }, | |
| { | |
| "completion_length": 236.375, | |
| "epoch": 0.16755555555555557, | |
| "grad_norm": 518.0326538085938, | |
| "learning_rate": 3e-06, | |
| "loss": 7.3195, | |
| "reward": 1.5208333730697632, | |
| "reward_std": 0.3332235887646675, | |
| "rewards/boxed_and_answer_tags_format_reward": 0.6875, | |
| "rewards/correctness_reward_func_math": 0.8333333283662796, | |
| "step": 1885, | |
| "zero_std_ratio": 0.625 | |
| }, | |
| { | |
| "epoch": 0.16764444444444446, | |
| "grad_norm": 477.8092346191406, | |
| "learning_rate": 3e-06, | |
| "loss": 9.9487, | |
| "step": 1886 | |
| }, | |
| { | |
| "epoch": 0.16773333333333335, | |
| "grad_norm": 573.36083984375, | |
| "learning_rate": 3e-06, | |
| "loss": -5.14, | |
| "step": 1887 | |
| }, | |
| { | |
| "epoch": 0.16782222222222223, | |
| "grad_norm": 552.39599609375, | |
| "learning_rate": 3e-06, | |
| "loss": 8.7078, | |
| "step": 1888 | |
| }, | |
| { | |
| "epoch": 0.16791111111111112, | |
| "grad_norm": 585.54296875, | |
| "learning_rate": 3e-06, | |
| "loss": 1.6238, | |
| "step": 1889 | |
| }, | |
| { | |
| "epoch": 0.168, | |
| "grad_norm": 431.30364990234375, | |
| "learning_rate": 3e-06, | |
| "loss": 2.3952, | |
| "step": 1890 | |
| }, | |
| { | |
| "epoch": 0.1680888888888889, | |
| "grad_norm": 878.414306640625, | |
| "learning_rate": 3e-06, | |
| "loss": 3.0619, | |
| "step": 1891 | |
| }, | |
| { | |
| "epoch": 0.1681777777777778, | |
| "grad_norm": 494.9808044433594, | |
| "learning_rate": 3e-06, | |
| "loss": 5.923, | |
| "step": 1892 | |
| }, | |
| { | |
| "epoch": 0.16826666666666668, | |
| "grad_norm": 434.52093505859375, | |
| "learning_rate": 3e-06, | |
| "loss": -13.3846, | |
| "step": 1893 | |
| }, | |
| { | |
| "epoch": 0.16835555555555556, | |
| "grad_norm": 355.6122131347656, | |
| "learning_rate": 3e-06, | |
| "loss": 1.8179, | |
| "step": 1894 | |
| }, | |
| { | |
| "epoch": 0.16844444444444445, | |
| "grad_norm": 613.9844970703125, | |
| "learning_rate": 3e-06, | |
| "loss": -2.4103, | |
| "step": 1895 | |
| }, | |
| { | |
| "epoch": 0.16853333333333334, | |
| "grad_norm": 337.2001953125, | |
| "learning_rate": 3e-06, | |
| "loss": -8.9448, | |
| "step": 1896 | |
| }, | |
| { | |
| "completion_length": 253.52084350585938, | |
| "epoch": 0.16862222222222223, | |
| "grad_norm": 439.8132629394531, | |
| "learning_rate": 3e-06, | |
| "loss": -50.7881, | |
| "reward": 1.291666716337204, | |
| "reward_std": 0.5373477265238762, | |
| "rewards/boxed_and_answer_tags_format_reward": 0.625, | |
| "rewards/correctness_reward_func_math": 0.6666666716337204, | |
| "step": 1897, | |
| "zero_std_ratio": 0.375 | |
| }, | |
| { | |
| "epoch": 0.16871111111111112, | |
| "grad_norm": 414.8184509277344, | |
| "learning_rate": 3e-06, | |
| "loss": -54.9974, | |
| "step": 1898 | |
| }, | |
| { | |
| "epoch": 0.1688, | |
| "grad_norm": 449.9412536621094, | |
| "learning_rate": 3e-06, | |
| "loss": -85.2961, | |
| "step": 1899 | |
| }, | |
| { | |
| "epoch": 0.1688888888888889, | |
| "grad_norm": 588.9105224609375, | |
| "learning_rate": 3e-06, | |
| "loss": -72.8215, | |
| "step": 1900 | |
| }, | |
| { | |
| "epoch": 0.16897777777777778, | |
| "grad_norm": 511.78790283203125, | |
| "learning_rate": 3e-06, | |
| "loss": -88.0678, | |
| "step": 1901 | |
| }, | |
| { | |
| "epoch": 0.16906666666666667, | |
| "grad_norm": 514.567138671875, | |
| "learning_rate": 3e-06, | |
| "loss": -90.2521, | |
| "step": 1902 | |
| }, | |
| { | |
| "epoch": 0.16915555555555556, | |
| "grad_norm": 394.65826416015625, | |
| "learning_rate": 3e-06, | |
| "loss": -56.8307, | |
| "step": 1903 | |
| }, | |
| { | |
| "epoch": 0.16924444444444445, | |
| "grad_norm": 463.7818603515625, | |
| "learning_rate": 3e-06, | |
| "loss": -62.5249, | |
| "step": 1904 | |
| }, | |
| { | |
| "epoch": 0.16933333333333334, | |
| "grad_norm": 579.9658813476562, | |
| "learning_rate": 3e-06, | |
| "loss": -90.807, | |
| "step": 1905 | |
| }, | |
| { | |
| "epoch": 0.16942222222222222, | |
| "grad_norm": 578.1275024414062, | |
| "learning_rate": 3e-06, | |
| "loss": -82.3221, | |
| "step": 1906 | |
| }, | |
| { | |
| "epoch": 0.1695111111111111, | |
| "grad_norm": 405.3597412109375, | |
| "learning_rate": 3e-06, | |
| "loss": -100.0349, | |
| "step": 1907 | |
| }, | |
| { | |
| "epoch": 0.1696, | |
| "grad_norm": 545.5985107421875, | |
| "learning_rate": 3e-06, | |
| "loss": -101.4231, | |
| "step": 1908 | |
| }, | |
| { | |
| "completion_length": 233.64584350585938, | |
| "epoch": 0.1696888888888889, | |
| "grad_norm": 459.9978942871094, | |
| "learning_rate": 3e-06, | |
| "loss": -49.3001, | |
| "reward": 1.7916667461395264, | |
| "reward_std": 0.4701542258262634, | |
| "rewards/boxed_and_answer_tags_format_reward": 0.75, | |
| "rewards/correctness_reward_func_math": 1.0416666865348816, | |
| "step": 1909, | |
| "zero_std_ratio": 0.5 | |
| }, | |
| { | |
| "epoch": 0.16977777777777778, | |
| "grad_norm": 383.19635009765625, | |
| "learning_rate": 3e-06, | |
| "loss": -29.5879, | |
| "step": 1910 | |
| }, | |
| { | |
| "epoch": 0.16986666666666667, | |
| "grad_norm": 451.4841003417969, | |
| "learning_rate": 3e-06, | |
| "loss": -53.6264, | |
| "step": 1911 | |
| }, | |
| { | |
| "epoch": 0.16995555555555555, | |
| "grad_norm": 429.31640625, | |
| "learning_rate": 3e-06, | |
| "loss": -41.3605, | |
| "step": 1912 | |
| }, | |
| { | |
| "epoch": 0.17004444444444444, | |
| "grad_norm": 476.54168701171875, | |
| "learning_rate": 3e-06, | |
| "loss": -33.034, | |
| "step": 1913 | |
| }, | |
| { | |
| "epoch": 0.17013333333333333, | |
| "grad_norm": 411.1586608886719, | |
| "learning_rate": 3e-06, | |
| "loss": -33.1554, | |
| "step": 1914 | |
| }, | |
| { | |
| "epoch": 0.17022222222222222, | |
| "grad_norm": 504.1165771484375, | |
| "learning_rate": 3e-06, | |
| "loss": -54.2275, | |
| "step": 1915 | |
| }, | |
| { | |
| "epoch": 0.1703111111111111, | |
| "grad_norm": 430.4921569824219, | |
| "learning_rate": 3e-06, | |
| "loss": -36.929, | |
| "step": 1916 | |
| }, | |
| { | |
| "epoch": 0.1704, | |
| "grad_norm": 488.67071533203125, | |
| "learning_rate": 3e-06, | |
| "loss": -59.2601, | |
| "step": 1917 | |
| }, | |
| { | |
| "epoch": 0.17048888888888888, | |
| "grad_norm": 381.2705078125, | |
| "learning_rate": 3e-06, | |
| "loss": -46.7398, | |
| "step": 1918 | |
| }, | |
| { | |
| "epoch": 0.17057777777777777, | |
| "grad_norm": 439.42071533203125, | |
| "learning_rate": 3e-06, | |
| "loss": -40.6781, | |
| "step": 1919 | |
| }, | |
| { | |
| "epoch": 0.17066666666666666, | |
| "grad_norm": 468.06365966796875, | |
| "learning_rate": 3e-06, | |
| "loss": -39.7694, | |
| "step": 1920 | |
| }, | |
| { | |
| "completion_length": 248.89583587646484, | |
| "epoch": 0.17075555555555555, | |
| "grad_norm": 389.05523681640625, | |
| "learning_rate": 3e-06, | |
| "loss": -15.4944, | |
| "reward": 1.2291667461395264, | |
| "reward_std": 0.23116152733564377, | |
| "rewards/boxed_and_answer_tags_format_reward": 0.6875, | |
| "rewards/correctness_reward_func_math": 0.5416666716337204, | |
| "step": 1921, | |
| "zero_std_ratio": 0.75 | |
| }, | |
| { | |
| "epoch": 0.17084444444444444, | |
| "grad_norm": 437.8026428222656, | |
| "learning_rate": 3e-06, | |
| "loss": -1.488, | |
| "step": 1922 | |
| }, | |
| { | |
| "epoch": 0.17093333333333333, | |
| "grad_norm": 251.8334197998047, | |
| "learning_rate": 3e-06, | |
| "loss": -4.1204, | |
| "step": 1923 | |
| }, | |
| { | |
| "epoch": 0.17102222222222221, | |
| "grad_norm": 547.3317260742188, | |
| "learning_rate": 3e-06, | |
| "loss": -28.7346, | |
| "step": 1924 | |
| }, | |
| { | |
| "epoch": 0.1711111111111111, | |
| "grad_norm": 289.30096435546875, | |
| "learning_rate": 3e-06, | |
| "loss": -16.5796, | |
| "step": 1925 | |
| }, | |
| { | |
| "epoch": 0.1712, | |
| "grad_norm": 335.68682861328125, | |
| "learning_rate": 3e-06, | |
| "loss": -26.3341, | |
| "step": 1926 | |
| }, | |
| { | |
| "epoch": 0.17128888888888888, | |
| "grad_norm": 396.2536926269531, | |
| "learning_rate": 3e-06, | |
| "loss": -18.4726, | |
| "step": 1927 | |
| }, | |
| { | |
| "epoch": 0.17137777777777777, | |
| "grad_norm": 490.247802734375, | |
| "learning_rate": 3e-06, | |
| "loss": -8.9601, | |
| "step": 1928 | |
| }, | |
| { | |
| "epoch": 0.17146666666666666, | |
| "grad_norm": 386.6977844238281, | |
| "learning_rate": 3e-06, | |
| "loss": -11.4419, | |
| "step": 1929 | |
| }, | |
| { | |
| "epoch": 0.17155555555555554, | |
| "grad_norm": 547.1292114257812, | |
| "learning_rate": 3e-06, | |
| "loss": -34.8083, | |
| "step": 1930 | |
| }, | |
| { | |
| "epoch": 0.17164444444444443, | |
| "grad_norm": 253.44793701171875, | |
| "learning_rate": 3e-06, | |
| "loss": -20.5172, | |
| "step": 1931 | |
| }, | |
| { | |
| "epoch": 0.17173333333333332, | |
| "grad_norm": 536.4385375976562, | |
| "learning_rate": 3e-06, | |
| "loss": -35.105, | |
| "step": 1932 | |
| }, | |
| { | |
| "completion_length": 254.0416717529297, | |
| "epoch": 0.1718222222222222, | |
| "grad_norm": 513.8276977539062, | |
| "learning_rate": 3e-06, | |
| "loss": 3.0418, | |
| "reward": 1.625, | |
| "reward_std": 0.39512956142425537, | |
| "rewards/boxed_and_answer_tags_format_reward": 0.75, | |
| "rewards/correctness_reward_func_math": 0.875, | |
| "step": 1933, | |
| "zero_std_ratio": 0.625 | |
| }, | |
| { | |
| "epoch": 0.1719111111111111, | |
| "grad_norm": 700.8008422851562, | |
| "learning_rate": 3e-06, | |
| "loss": -24.1725, | |
| "step": 1934 | |
| }, | |
| { | |
| "epoch": 0.172, | |
| "grad_norm": 640.3589477539062, | |
| "learning_rate": 3e-06, | |
| "loss": -18.554, | |
| "step": 1935 | |
| }, | |
| { | |
| "epoch": 0.1720888888888889, | |
| "grad_norm": 507.8769836425781, | |
| "learning_rate": 3e-06, | |
| "loss": -30.509, | |
| "step": 1936 | |
| }, | |
| { | |
| "epoch": 0.1721777777777778, | |
| "grad_norm": 414.5351257324219, | |
| "learning_rate": 3e-06, | |
| "loss": -39.3267, | |
| "step": 1937 | |
| }, | |
| { | |
| "epoch": 0.17226666666666668, | |
| "grad_norm": 445.7782897949219, | |
| "learning_rate": 3e-06, | |
| "loss": -25.2095, | |
| "step": 1938 | |
| }, | |
| { | |
| "epoch": 0.17235555555555557, | |
| "grad_norm": 665.8450927734375, | |
| "learning_rate": 3e-06, | |
| "loss": -6.8353, | |
| "step": 1939 | |
| }, | |
| { | |
| "epoch": 0.17244444444444446, | |
| "grad_norm": 558.6971435546875, | |
| "learning_rate": 3e-06, | |
| "loss": -33.1913, | |
| "step": 1940 | |
| }, | |
| { | |
| "epoch": 0.17253333333333334, | |
| "grad_norm": 500.55841064453125, | |
| "learning_rate": 3e-06, | |
| "loss": -30.2144, | |
| "step": 1941 | |
| }, | |
| { | |
| "epoch": 0.17262222222222223, | |
| "grad_norm": 530.8914184570312, | |
| "learning_rate": 3e-06, | |
| "loss": -38.4873, | |
| "step": 1942 | |
| }, | |
| { | |
| "epoch": 0.17271111111111112, | |
| "grad_norm": 427.3404846191406, | |
| "learning_rate": 3e-06, | |
| "loss": -45.8584, | |
| "step": 1943 | |
| }, | |
| { | |
| "epoch": 0.1728, | |
| "grad_norm": 391.1449279785156, | |
| "learning_rate": 3e-06, | |
| "loss": -29.2224, | |
| "step": 1944 | |
| }, | |
| { | |
| "completion_length": 255.1041717529297, | |
| "epoch": 0.1728888888888889, | |
| "grad_norm": 763.403076171875, | |
| "learning_rate": 3e-06, | |
| "loss": -9.7223, | |
| "reward": 1.8333333730697632, | |
| "reward_std": 0.4971916079521179, | |
| "rewards/boxed_and_answer_tags_format_reward": 0.75, | |
| "rewards/correctness_reward_func_math": 1.0833333134651184, | |
| "step": 1945, | |
| "zero_std_ratio": 0.5 | |
| }, | |
| { | |
| "epoch": 0.17297777777777779, | |
| "grad_norm": 562.3991088867188, | |
| "learning_rate": 3e-06, | |
| "loss": -7.2214, | |
| "step": 1946 | |
| }, | |
| { | |
| "epoch": 0.17306666666666667, | |
| "grad_norm": 686.5684814453125, | |
| "learning_rate": 3e-06, | |
| "loss": 2.4798, | |
| "step": 1947 | |
| }, | |
| { | |
| "epoch": 0.17315555555555556, | |
| "grad_norm": 897.48046875, | |
| "learning_rate": 3e-06, | |
| "loss": -0.4508, | |
| "step": 1948 | |
| }, | |
| { | |
| "epoch": 0.17324444444444445, | |
| "grad_norm": 439.28924560546875, | |
| "learning_rate": 3e-06, | |
| "loss": -15.6095, | |
| "step": 1949 | |
| }, | |
| { | |
| "epoch": 0.17333333333333334, | |
| "grad_norm": 501.46044921875, | |
| "learning_rate": 3e-06, | |
| "loss": -7.5293, | |
| "step": 1950 | |
| }, | |
| { | |
| "epoch": 0.17342222222222223, | |
| "grad_norm": 659.4769287109375, | |
| "learning_rate": 3e-06, | |
| "loss": -14.7485, | |
| "step": 1951 | |
| }, | |
| { | |
| "epoch": 0.17351111111111112, | |
| "grad_norm": 528.5435180664062, | |
| "learning_rate": 3e-06, | |
| "loss": -14.1578, | |
| "step": 1952 | |
| }, | |
| { | |
| "epoch": 0.1736, | |
| "grad_norm": 827.1624145507812, | |
| "learning_rate": 3e-06, | |
| "loss": -3.8977, | |
| "step": 1953 | |
| }, | |
| { | |
| "epoch": 0.1736888888888889, | |
| "grad_norm": 593.5472412109375, | |
| "learning_rate": 3e-06, | |
| "loss": -4.4025, | |
| "step": 1954 | |
| }, | |
| { | |
| "epoch": 0.17377777777777778, | |
| "grad_norm": 444.0652160644531, | |
| "learning_rate": 3e-06, | |
| "loss": -18.2012, | |
| "step": 1955 | |
| }, | |
| { | |
| "epoch": 0.17386666666666667, | |
| "grad_norm": 478.5816345214844, | |
| "learning_rate": 3e-06, | |
| "loss": -13.0243, | |
| "step": 1956 | |
| }, | |
| { | |
| "completion_length": 253.25000762939453, | |
| "epoch": 0.17395555555555556, | |
| "grad_norm": 375.89666748046875, | |
| "learning_rate": 3e-06, | |
| "loss": -12.1653, | |
| "reward": 1.2291666865348816, | |
| "reward_std": 0.10206207633018494, | |
| "rewards/boxed_and_answer_tags_format_reward": 0.6875, | |
| "rewards/correctness_reward_func_math": 0.5416666567325592, | |
| "step": 1957, | |
| "zero_std_ratio": 0.875 | |
| }, | |
| { | |
| "epoch": 0.17404444444444445, | |
| "grad_norm": 902.545654296875, | |
| "learning_rate": 3e-06, | |
| "loss": -25.8535, | |
| "step": 1958 | |
| }, | |
| { | |
| "epoch": 0.17413333333333333, | |
| "grad_norm": 629.4489135742188, | |
| "learning_rate": 3e-06, | |
| "loss": -22.1681, | |
| "step": 1959 | |
| }, | |
| { | |
| "epoch": 0.17422222222222222, | |
| "grad_norm": 359.41644287109375, | |
| "learning_rate": 3e-06, | |
| "loss": -19.6096, | |
| "step": 1960 | |
| }, | |
| { | |
| "epoch": 0.1743111111111111, | |
| "grad_norm": 356.7577819824219, | |
| "learning_rate": 3e-06, | |
| "loss": -12.0564, | |
| "step": 1961 | |
| }, | |
| { | |
| "epoch": 0.1744, | |
| "grad_norm": 236.30433654785156, | |
| "learning_rate": 3e-06, | |
| "loss": -25.8836, | |
| "step": 1962 | |
| }, | |
| { | |
| "epoch": 0.1744888888888889, | |
| "grad_norm": 340.1228332519531, | |
| "learning_rate": 3e-06, | |
| "loss": -14.5898, | |
| "step": 1963 | |
| }, | |
| { | |
| "epoch": 0.17457777777777778, | |
| "grad_norm": 186.7640838623047, | |
| "learning_rate": 3e-06, | |
| "loss": -29.7451, | |
| "step": 1964 | |
| }, | |
| { | |
| "epoch": 0.17466666666666666, | |
| "grad_norm": 411.6400451660156, | |
| "learning_rate": 3e-06, | |
| "loss": -20.199, | |
| "step": 1965 | |
| }, | |
| { | |
| "epoch": 0.17475555555555555, | |
| "grad_norm": 319.7075500488281, | |
| "learning_rate": 3e-06, | |
| "loss": -22.8842, | |
| "step": 1966 | |
| }, | |
| { | |
| "epoch": 0.17484444444444444, | |
| "grad_norm": 417.3407287597656, | |
| "learning_rate": 3e-06, | |
| "loss": -16.5058, | |
| "step": 1967 | |
| }, | |
| { | |
| "epoch": 0.17493333333333333, | |
| "grad_norm": 243.57456970214844, | |
| "learning_rate": 3e-06, | |
| "loss": -31.0802, | |
| "step": 1968 | |
| }, | |
| { | |
| "completion_length": 249.93750762939453, | |
| "epoch": 0.17502222222222222, | |
| "grad_norm": 276.6730041503906, | |
| "learning_rate": 3e-06, | |
| "loss": -29.1006, | |
| "reward": 1.3958333730697632, | |
| "reward_std": 0.10206206887960434, | |
| "rewards/boxed_and_answer_tags_format_reward": 0.6875, | |
| "rewards/correctness_reward_func_math": 0.7083333432674408, | |
| "step": 1969, | |
| "zero_std_ratio": 0.875 | |
| }, | |
| { | |
| "epoch": 0.1751111111111111, | |
| "grad_norm": 699.74658203125, | |
| "learning_rate": 3e-06, | |
| "loss": -14.0976, | |
| "step": 1970 | |
| }, | |
| { | |
| "epoch": 0.1752, | |
| "grad_norm": 471.5838623046875, | |
| "learning_rate": 3e-06, | |
| "loss": -20.9071, | |
| "step": 1971 | |
| }, | |
| { | |
| "epoch": 0.17528888888888888, | |
| "grad_norm": 472.9868469238281, | |
| "learning_rate": 3e-06, | |
| "loss": -25.4522, | |
| "step": 1972 | |
| }, | |
| { | |
| "epoch": 0.17537777777777777, | |
| "grad_norm": 254.69761657714844, | |
| "learning_rate": 3e-06, | |
| "loss": -29.7094, | |
| "step": 1973 | |
| }, | |
| { | |
| "epoch": 0.17546666666666666, | |
| "grad_norm": 248.12869262695312, | |
| "learning_rate": 3e-06, | |
| "loss": -28.2889, | |
| "step": 1974 | |
| }, | |
| { | |
| "epoch": 0.17555555555555555, | |
| "grad_norm": 236.1974639892578, | |
| "learning_rate": 3e-06, | |
| "loss": -32.1523, | |
| "step": 1975 | |
| }, | |
| { | |
| "epoch": 0.17564444444444444, | |
| "grad_norm": 648.0961303710938, | |
| "learning_rate": 3e-06, | |
| "loss": -16.4427, | |
| "step": 1976 | |
| }, | |
| { | |
| "epoch": 0.17573333333333332, | |
| "grad_norm": 378.50634765625, | |
| "learning_rate": 3e-06, | |
| "loss": -27.5418, | |
| "step": 1977 | |
| }, | |
| { | |
| "epoch": 0.1758222222222222, | |
| "grad_norm": 439.670654296875, | |
| "learning_rate": 3e-06, | |
| "loss": -30.8163, | |
| "step": 1978 | |
| }, | |
| { | |
| "epoch": 0.1759111111111111, | |
| "grad_norm": 216.43161010742188, | |
| "learning_rate": 3e-06, | |
| "loss": -34.2176, | |
| "step": 1979 | |
| }, | |
| { | |
| "epoch": 0.176, | |
| "grad_norm": 242.0641632080078, | |
| "learning_rate": 3e-06, | |
| "loss": -31.2334, | |
| "step": 1980 | |
| }, | |
| { | |
| "completion_length": 248.2916717529297, | |
| "epoch": 0.17608888888888888, | |
| "grad_norm": 431.9725646972656, | |
| "learning_rate": 3e-06, | |
| "loss": -90.337, | |
| "reward": 1.0208333730697632, | |
| "reward_std": 0.37592336535453796, | |
| "rewards/boxed_and_answer_tags_format_reward": 0.5625, | |
| "rewards/correctness_reward_func_math": 0.4583333432674408, | |
| "step": 1981, | |
| "zero_std_ratio": 0.625 | |
| }, | |
| { | |
| "epoch": 0.17617777777777777, | |
| "grad_norm": 421.96124267578125, | |
| "learning_rate": 3e-06, | |
| "loss": -74.4792, | |
| "step": 1982 | |
| }, | |
| { | |
| "epoch": 0.17626666666666665, | |
| "grad_norm": 376.8736877441406, | |
| "learning_rate": 3e-06, | |
| "loss": -66.9567, | |
| "step": 1983 | |
| }, | |
| { | |
| "epoch": 0.17635555555555554, | |
| "grad_norm": 489.1451110839844, | |
| "learning_rate": 3e-06, | |
| "loss": -61.4308, | |
| "step": 1984 | |
| }, | |
| { | |
| "epoch": 0.17644444444444443, | |
| "grad_norm": 597.60498046875, | |
| "learning_rate": 3e-06, | |
| "loss": -58.1928, | |
| "step": 1985 | |
| }, | |
| { | |
| "epoch": 0.17653333333333332, | |
| "grad_norm": 466.6503601074219, | |
| "learning_rate": 3e-06, | |
| "loss": -63.9999, | |
| "step": 1986 | |
| }, | |
| { | |
| "epoch": 0.17662222222222224, | |
| "grad_norm": 420.9534606933594, | |
| "learning_rate": 3e-06, | |
| "loss": -98.6249, | |
| "step": 1987 | |
| }, | |
| { | |
| "epoch": 0.17671111111111112, | |
| "grad_norm": 428.0660400390625, | |
| "learning_rate": 3e-06, | |
| "loss": -80.7811, | |
| "step": 1988 | |
| }, | |
| { | |
| "epoch": 0.1768, | |
| "grad_norm": 381.4080505371094, | |
| "learning_rate": 3e-06, | |
| "loss": -75.1067, | |
| "step": 1989 | |
| }, | |
| { | |
| "epoch": 0.1768888888888889, | |
| "grad_norm": 451.9751892089844, | |
| "learning_rate": 3e-06, | |
| "loss": -66.6592, | |
| "step": 1990 | |
| }, | |
| { | |
| "epoch": 0.1769777777777778, | |
| "grad_norm": 526.015625, | |
| "learning_rate": 3e-06, | |
| "loss": -69.5291, | |
| "step": 1991 | |
| }, | |
| { | |
| "epoch": 0.17706666666666668, | |
| "grad_norm": 532.5322265625, | |
| "learning_rate": 3e-06, | |
| "loss": -72.9259, | |
| "step": 1992 | |
| }, | |
| { | |
| "completion_length": 254.4166717529297, | |
| "epoch": 0.17715555555555557, | |
| "grad_norm": 836.5994873046875, | |
| "learning_rate": 3e-06, | |
| "loss": -56.0218, | |
| "reward": 1.0625000298023224, | |
| "reward_std": 0.6184598803520203, | |
| "rewards/boxed_and_answer_tags_format_reward": 0.6875, | |
| "rewards/correctness_reward_func_math": 0.3749999925494194, | |
| "step": 1993, | |
| "zero_std_ratio": 0.375 | |
| }, | |
| { | |
| "epoch": 0.17724444444444445, | |
| "grad_norm": 828.1771240234375, | |
| "learning_rate": 3e-06, | |
| "loss": 7.2773, | |
| "step": 1994 | |
| }, | |
| { | |
| "epoch": 0.17733333333333334, | |
| "grad_norm": 832.0421752929688, | |
| "learning_rate": 3e-06, | |
| "loss": -26.279, | |
| "step": 1995 | |
| }, | |
| { | |
| "epoch": 0.17742222222222223, | |
| "grad_norm": 842.8486938476562, | |
| "learning_rate": 3e-06, | |
| "loss": -26.3621, | |
| "step": 1996 | |
| }, | |
| { | |
| "epoch": 0.17751111111111112, | |
| "grad_norm": 641.175048828125, | |
| "learning_rate": 3e-06, | |
| "loss": -35.8716, | |
| "step": 1997 | |
| }, | |
| { | |
| "epoch": 0.1776, | |
| "grad_norm": 621.6319580078125, | |
| "learning_rate": 3e-06, | |
| "loss": -46.1934, | |
| "step": 1998 | |
| }, | |
| { | |
| "epoch": 0.1776888888888889, | |
| "grad_norm": 908.200927734375, | |
| "learning_rate": 3e-06, | |
| "loss": -65.1173, | |
| "step": 1999 | |
| }, | |
| { | |
| "epoch": 0.17777777777777778, | |
| "grad_norm": 875.9900512695312, | |
| "learning_rate": 3e-06, | |
| "loss": 0.5691, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 0.17786666666666667, | |
| "grad_norm": 801.6747436523438, | |
| "learning_rate": 3e-06, | |
| "loss": -35.2798, | |
| "step": 2001 | |
| }, | |
| { | |
| "epoch": 0.17795555555555556, | |
| "grad_norm": 942.287353515625, | |
| "learning_rate": 3e-06, | |
| "loss": -34.8283, | |
| "step": 2002 | |
| }, | |
| { | |
| "epoch": 0.17804444444444445, | |
| "grad_norm": 616.319091796875, | |
| "learning_rate": 3e-06, | |
| "loss": -39.5028, | |
| "step": 2003 | |
| }, | |
| { | |
| "epoch": 0.17813333333333334, | |
| "grad_norm": 901.8914184570312, | |
| "learning_rate": 3e-06, | |
| "loss": -52.1208, | |
| "step": 2004 | |
| }, | |
| { | |
| "completion_length": 248.33333587646484, | |
| "epoch": 0.17822222222222223, | |
| "grad_norm": 706.6128540039062, | |
| "learning_rate": 3e-06, | |
| "loss": -29.2574, | |
| "reward": 1.5416666865348816, | |
| "reward_std": 0.6341222822666168, | |
| "rewards/boxed_and_answer_tags_format_reward": 0.75, | |
| "rewards/correctness_reward_func_math": 0.7916666865348816, | |
| "step": 2005, | |
| "zero_std_ratio": 0.375 | |
| }, | |
| { | |
| "epoch": 0.17831111111111111, | |
| "grad_norm": 781.2979736328125, | |
| "learning_rate": 3e-06, | |
| "loss": -8.4374, | |
| "step": 2006 | |
| }, | |
| { | |
| "epoch": 0.1784, | |
| "grad_norm": 799.8029174804688, | |
| "learning_rate": 3e-06, | |
| "loss": -62.7724, | |
| "step": 2007 | |
| }, | |
| { | |
| "epoch": 0.1784888888888889, | |
| "grad_norm": 783.2274780273438, | |
| "learning_rate": 3e-06, | |
| "loss": -32.8635, | |
| "step": 2008 | |
| }, | |
| { | |
| "epoch": 0.17857777777777778, | |
| "grad_norm": 657.0900268554688, | |
| "learning_rate": 3e-06, | |
| "loss": -6.8832, | |
| "step": 2009 | |
| }, | |
| { | |
| "epoch": 0.17866666666666667, | |
| "grad_norm": 736.504638671875, | |
| "learning_rate": 3e-06, | |
| "loss": -34.957, | |
| "step": 2010 | |
| }, | |
| { | |
| "epoch": 0.17875555555555556, | |
| "grad_norm": 723.53173828125, | |
| "learning_rate": 3e-06, | |
| "loss": -38.0641, | |
| "step": 2011 | |
| }, | |
| { | |
| "epoch": 0.17884444444444444, | |
| "grad_norm": 789.1529541015625, | |
| "learning_rate": 3e-06, | |
| "loss": -14.8114, | |
| "step": 2012 | |
| }, | |
| { | |
| "epoch": 0.17893333333333333, | |
| "grad_norm": 596.5152587890625, | |
| "learning_rate": 3e-06, | |
| "loss": -74.1531, | |
| "step": 2013 | |
| }, | |
| { | |
| "epoch": 0.17902222222222222, | |
| "grad_norm": 761.294189453125, | |
| "learning_rate": 3e-06, | |
| "loss": -44.1236, | |
| "step": 2014 | |
| }, | |
| { | |
| "epoch": 0.1791111111111111, | |
| "grad_norm": 669.9348754882812, | |
| "learning_rate": 3e-06, | |
| "loss": -13.3884, | |
| "step": 2015 | |
| }, | |
| { | |
| "epoch": 0.1792, | |
| "grad_norm": 624.5435180664062, | |
| "learning_rate": 3e-06, | |
| "loss": -43.6932, | |
| "step": 2016 | |
| }, | |
| { | |
| "completion_length": 248.64584350585938, | |
| "epoch": 0.1792888888888889, | |
| "grad_norm": 277.96478271484375, | |
| "learning_rate": 3e-06, | |
| "loss": 27.6711, | |
| "reward": 1.0625, | |
| "reward_std": 0.23116151988506317, | |
| "rewards/boxed_and_answer_tags_format_reward": 0.6875, | |
| "rewards/correctness_reward_func_math": 0.375, | |
| "step": 2017, | |
| "zero_std_ratio": 0.75 | |
| }, | |
| { | |
| "epoch": 0.17937777777777777, | |
| "grad_norm": 346.2447814941406, | |
| "learning_rate": 3e-06, | |
| "loss": 23.3867, | |
| "step": 2018 | |
| }, | |
| { | |
| "epoch": 0.17946666666666666, | |
| "grad_norm": 250.9003448486328, | |
| "learning_rate": 3e-06, | |
| "loss": 24.0388, | |
| "step": 2019 | |
| }, | |
| { | |
| "epoch": 0.17955555555555555, | |
| "grad_norm": 308.0636291503906, | |
| "learning_rate": 3e-06, | |
| "loss": 22.8199, | |
| "step": 2020 | |
| }, | |
| { | |
| "epoch": 0.17964444444444444, | |
| "grad_norm": 356.0393371582031, | |
| "learning_rate": 3e-06, | |
| "loss": 17.5979, | |
| "step": 2021 | |
| }, | |
| { | |
| "epoch": 0.17973333333333333, | |
| "grad_norm": 350.8787841796875, | |
| "learning_rate": 3e-06, | |
| "loss": 21.2881, | |
| "step": 2022 | |
| }, | |
| { | |
| "epoch": 0.17982222222222222, | |
| "grad_norm": 290.56292724609375, | |
| "learning_rate": 3e-06, | |
| "loss": 24.3076, | |
| "step": 2023 | |
| }, | |
| { | |
| "epoch": 0.1799111111111111, | |
| "grad_norm": 291.3890075683594, | |
| "learning_rate": 3e-06, | |
| "loss": 19.0537, | |
| "step": 2024 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "grad_norm": 249.847412109375, | |
| "learning_rate": 3e-06, | |
| "loss": 20.9012, | |
| "step": 2025 | |
| }, | |
| { | |
| "epoch": 0.18008888888888888, | |
| "grad_norm": 343.4595031738281, | |
| "learning_rate": 3e-06, | |
| "loss": 17.5784, | |
| "step": 2026 | |
| }, | |
| { | |
| "epoch": 0.18017777777777777, | |
| "grad_norm": 551.070068359375, | |
| "learning_rate": 3e-06, | |
| "loss": 11.5671, | |
| "step": 2027 | |
| }, | |
| { | |
| "epoch": 0.18026666666666666, | |
| "grad_norm": 353.6826171875, | |
| "learning_rate": 3e-06, | |
| "loss": 13.8267, | |
| "step": 2028 | |
| }, | |
| { | |
| "completion_length": 252.8541717529297, | |
| "epoch": 0.18035555555555555, | |
| "grad_norm": 691.8173828125, | |
| "learning_rate": 3e-06, | |
| "loss": 19.1562, | |
| "reward": 1.3958333730697632, | |
| "reward_std": 0.505022794008255, | |
| "rewards/boxed_and_answer_tags_format_reward": 0.6875, | |
| "rewards/correctness_reward_func_math": 0.7083333134651184, | |
| "step": 2029, | |
| "zero_std_ratio": 0.5 | |
| }, | |
| { | |
| "epoch": 0.18044444444444444, | |
| "grad_norm": 769.9490356445312, | |
| "learning_rate": 3e-06, | |
| "loss": -6.2182, | |
| "step": 2030 | |
| }, | |
| { | |
| "epoch": 0.18053333333333332, | |
| "grad_norm": 746.6094360351562, | |
| "learning_rate": 3e-06, | |
| "loss": 26.2743, | |
| "step": 2031 | |
| }, | |
| { | |
| "epoch": 0.1806222222222222, | |
| "grad_norm": 538.3868408203125, | |
| "learning_rate": 3e-06, | |
| "loss": 41.1944, | |
| "step": 2032 | |
| }, | |
| { | |
| "epoch": 0.1807111111111111, | |
| "grad_norm": 1304.790771484375, | |
| "learning_rate": 3e-06, | |
| "loss": -12.3747, | |
| "step": 2033 | |
| }, | |
| { | |
| "epoch": 0.1808, | |
| "grad_norm": 551.8775634765625, | |
| "learning_rate": 3e-06, | |
| "loss": 18.321, | |
| "step": 2034 | |
| }, | |
| { | |
| "epoch": 0.18088888888888888, | |
| "grad_norm": 639.1527709960938, | |
| "learning_rate": 3e-06, | |
| "loss": 9.5878, | |
| "step": 2035 | |
| }, | |
| { | |
| "epoch": 0.18097777777777777, | |
| "grad_norm": 659.2324829101562, | |
| "learning_rate": 3e-06, | |
| "loss": -12.4579, | |
| "step": 2036 | |
| }, | |
| { | |
| "epoch": 0.18106666666666665, | |
| "grad_norm": 709.0194702148438, | |
| "learning_rate": 3e-06, | |
| "loss": 14.4976, | |
| "step": 2037 | |
| }, | |
| { | |
| "epoch": 0.18115555555555554, | |
| "grad_norm": 522.2162475585938, | |
| "learning_rate": 3e-06, | |
| "loss": 31.9036, | |
| "step": 2038 | |
| }, | |
| { | |
| "epoch": 0.18124444444444446, | |
| "grad_norm": 605.5569458007812, | |
| "learning_rate": 3e-06, | |
| "loss": -16.3849, | |
| "step": 2039 | |
| }, | |
| { | |
| "epoch": 0.18133333333333335, | |
| "grad_norm": 586.4031982421875, | |
| "learning_rate": 3e-06, | |
| "loss": 7.3009, | |
| "step": 2040 | |
| }, | |
| { | |
| "completion_length": 255.25, | |
| "epoch": 0.18142222222222223, | |
| "grad_norm": 294.46368408203125, | |
| "learning_rate": 3e-06, | |
| "loss": -46.1645, | |
| "reward": 1.1666666865348816, | |
| "reward_std": 0.26603010296821594, | |
| "rewards/boxed_and_answer_tags_format_reward": 0.625, | |
| "rewards/correctness_reward_func_math": 0.5416666567325592, | |
| "step": 2041, | |
| "zero_std_ratio": 0.75 | |
| }, | |
| { | |
| "epoch": 0.18151111111111112, | |
| "grad_norm": 281.5460510253906, | |
| "learning_rate": 3e-06, | |
| "loss": -45.9692, | |
| "step": 2042 | |
| }, | |
| { | |
| "epoch": 0.1816, | |
| "grad_norm": 426.70758056640625, | |
| "learning_rate": 3e-06, | |
| "loss": -59.0595, | |
| "step": 2043 | |
| }, | |
| { | |
| "epoch": 0.1816888888888889, | |
| "grad_norm": 379.1589660644531, | |
| "learning_rate": 3e-06, | |
| "loss": -48.9399, | |
| "step": 2044 | |
| }, | |
| { | |
| "epoch": 0.1817777777777778, | |
| "grad_norm": 372.3189697265625, | |
| "learning_rate": 3e-06, | |
| "loss": -59.5334, | |
| "step": 2045 | |
| }, | |
| { | |
| "epoch": 0.18186666666666668, | |
| "grad_norm": 316.384765625, | |
| "learning_rate": 3e-06, | |
| "loss": -42.7701, | |
| "step": 2046 | |
| }, | |
| { | |
| "epoch": 0.18195555555555556, | |
| "grad_norm": 333.133056640625, | |
| "learning_rate": 3e-06, | |
| "loss": -52.4876, | |
| "step": 2047 | |
| }, | |
| { | |
| "epoch": 0.18204444444444445, | |
| "grad_norm": 302.69488525390625, | |
| "learning_rate": 3e-06, | |
| "loss": -50.558, | |
| "step": 2048 | |
| }, | |
| { | |
| "epoch": 0.18213333333333334, | |
| "grad_norm": 297.52264404296875, | |
| "learning_rate": 3e-06, | |
| "loss": -65.7042, | |
| "step": 2049 | |
| }, | |
| { | |
| "epoch": 0.18222222222222223, | |
| "grad_norm": 390.93719482421875, | |
| "learning_rate": 3e-06, | |
| "loss": -56.6311, | |
| "step": 2050 | |
| }, | |
| { | |
| "epoch": 0.18231111111111112, | |
| "grad_norm": 510.16064453125, | |
| "learning_rate": 3e-06, | |
| "loss": -66.4786, | |
| "step": 2051 | |
| }, | |
| { | |
| "epoch": 0.1824, | |
| "grad_norm": 386.4976501464844, | |
| "learning_rate": 3e-06, | |
| "loss": -55.7261, | |
| "step": 2052 | |
| }, | |
| { | |
| "completion_length": 231.77083587646484, | |
| "epoch": 0.1824888888888889, | |
| "grad_norm": 214.66683959960938, | |
| "learning_rate": 3e-06, | |
| "loss": -13.3666, | |
| "reward": 1.0208333730697632, | |
| "reward_std": 0.10206207633018494, | |
| "rewards/boxed_and_answer_tags_format_reward": 0.5625, | |
| "rewards/correctness_reward_func_math": 0.4583333283662796, | |
| "step": 2053, | |
| "zero_std_ratio": 0.875 | |
| }, | |
| { | |
| "epoch": 0.18257777777777778, | |
| "grad_norm": 241.9335174560547, | |
| "learning_rate": 3e-06, | |
| "loss": -16.4601, | |
| "step": 2054 | |
| }, | |
| { | |
| "epoch": 0.18266666666666667, | |
| "grad_norm": 227.05535888671875, | |
| "learning_rate": 3e-06, | |
| "loss": -15.4358, | |
| "step": 2055 | |
| }, | |
| { | |
| "epoch": 0.18275555555555556, | |
| "grad_norm": 280.6861267089844, | |
| "learning_rate": 3e-06, | |
| "loss": -19.7707, | |
| "step": 2056 | |
| }, | |
| { | |
| "epoch": 0.18284444444444445, | |
| "grad_norm": 211.3414306640625, | |
| "learning_rate": 3e-06, | |
| "loss": -11.4767, | |
| "step": 2057 | |
| }, | |
| { | |
| "epoch": 0.18293333333333334, | |
| "grad_norm": 332.11248779296875, | |
| "learning_rate": 3e-06, | |
| "loss": -11.748, | |
| "step": 2058 | |
| }, | |
| { | |
| "epoch": 0.18302222222222223, | |
| "grad_norm": 210.33470153808594, | |
| "learning_rate": 3e-06, | |
| "loss": -16.0377, | |
| "step": 2059 | |
| }, | |
| { | |
| "epoch": 0.1831111111111111, | |
| "grad_norm": 230.14593505859375, | |
| "learning_rate": 3e-06, | |
| "loss": -19.6348, | |
| "step": 2060 | |
| }, | |
| { | |
| "epoch": 0.1832, | |
| "grad_norm": 215.13331604003906, | |
| "learning_rate": 3e-06, | |
| "loss": -19.3665, | |
| "step": 2061 | |
| }, | |
| { | |
| "epoch": 0.1832888888888889, | |
| "grad_norm": 401.6134338378906, | |
| "learning_rate": 3e-06, | |
| "loss": -22.9875, | |
| "step": 2062 | |
| }, | |
| { | |
| "epoch": 0.18337777777777778, | |
| "grad_norm": 223.34193420410156, | |
| "learning_rate": 3e-06, | |
| "loss": -16.2301, | |
| "step": 2063 | |
| }, | |
| { | |
| "epoch": 0.18346666666666667, | |
| "grad_norm": 241.48159790039062, | |
| "learning_rate": 3e-06, | |
| "loss": -17.0173, | |
| "step": 2064 | |
| }, | |
| { | |
| "completion_length": 250.12500762939453, | |
| "epoch": 0.18355555555555556, | |
| "grad_norm": 783.0624389648438, | |
| "learning_rate": 3e-06, | |
| "loss": -7.0804, | |
| "reward": 1.2500000596046448, | |
| "reward_std": 0.3332235962152481, | |
| "rewards/boxed_and_answer_tags_format_reward": 0.75, | |
| "rewards/correctness_reward_func_math": 0.5000000111758709, | |
| "step": 2065, | |
| "zero_std_ratio": 0.625 | |
| }, | |
| { | |
| "epoch": 0.18364444444444444, | |
| "grad_norm": 456.5368957519531, | |
| "learning_rate": 3e-06, | |
| "loss": -5.7073, | |
| "step": 2066 | |
| }, | |
| { | |
| "epoch": 0.18373333333333333, | |
| "grad_norm": 801.0426635742188, | |
| "learning_rate": 3e-06, | |
| "loss": 24.9016, | |
| "step": 2067 | |
| }, | |
| { | |
| "epoch": 0.18382222222222222, | |
| "grad_norm": 452.8072204589844, | |
| "learning_rate": 3e-06, | |
| "loss": -16.3536, | |
| "step": 2068 | |
| }, | |
| { | |
| "epoch": 0.1839111111111111, | |
| "grad_norm": 573.7277221679688, | |
| "learning_rate": 3e-06, | |
| "loss": -14.2969, | |
| "step": 2069 | |
| }, | |
| { | |
| "epoch": 0.184, | |
| "grad_norm": 490.78375244140625, | |
| "learning_rate": 3e-06, | |
| "loss": -29.7209, | |
| "step": 2070 | |
| }, | |
| { | |
| "epoch": 0.18408888888888889, | |
| "grad_norm": 967.42578125, | |
| "learning_rate": 3e-06, | |
| "loss": -11.7546, | |
| "step": 2071 | |
| }, | |
| { | |
| "epoch": 0.18417777777777777, | |
| "grad_norm": 446.62945556640625, | |
| "learning_rate": 3e-06, | |
| "loss": -11.8012, | |
| "step": 2072 | |
| }, | |
| { | |
| "epoch": 0.18426666666666666, | |
| "grad_norm": 586.0079345703125, | |
| "learning_rate": 3e-06, | |
| "loss": 16.1318, | |
| "step": 2073 | |
| }, | |
| { | |
| "epoch": 0.18435555555555555, | |
| "grad_norm": 483.7483825683594, | |
| "learning_rate": 3e-06, | |
| "loss": -22.5055, | |
| "step": 2074 | |
| }, | |
| { | |
| "epoch": 0.18444444444444444, | |
| "grad_norm": 552.7943725585938, | |
| "learning_rate": 3e-06, | |
| "loss": -23.8956, | |
| "step": 2075 | |
| }, | |
| { | |
| "epoch": 0.18453333333333333, | |
| "grad_norm": 516.2462768554688, | |
| "learning_rate": 3e-06, | |
| "loss": -36.5617, | |
| "step": 2076 | |
| }, | |
| { | |
| "completion_length": 245.45833587646484, | |
| "epoch": 0.18462222222222222, | |
| "grad_norm": 526.9989013671875, | |
| "learning_rate": 3e-06, | |
| "loss": -1.3188, | |
| "reward": 1.8541667461395264, | |
| "reward_std": 0.3872983753681183, | |
| "rewards/boxed_and_answer_tags_format_reward": 0.6875, | |
| "rewards/correctness_reward_func_math": 1.1666666269302368, | |
| "step": 2077, | |
| "zero_std_ratio": 0.625 | |
| }, | |
| { | |
| "epoch": 0.1847111111111111, | |
| "grad_norm": 695.4895629882812, | |
| "learning_rate": 3e-06, | |
| "loss": -7.7195, | |
| "step": 2078 | |
| }, | |
| { | |
| "epoch": 0.1848, | |
| "grad_norm": 732.2682495117188, | |
| "learning_rate": 3e-06, | |
| "loss": -3.051, | |
| "step": 2079 | |
| }, | |
| { | |
| "epoch": 0.18488888888888888, | |
| "grad_norm": 717.4251098632812, | |
| "learning_rate": 3e-06, | |
| "loss": 6.4156, | |
| "step": 2080 | |
| }, | |
| { | |
| "epoch": 0.18497777777777777, | |
| "grad_norm": 588.271484375, | |
| "learning_rate": 3e-06, | |
| "loss": -1.8704, | |
| "step": 2081 | |
| }, | |
| { | |
| "epoch": 0.18506666666666666, | |
| "grad_norm": 563.7857055664062, | |
| "learning_rate": 3e-06, | |
| "loss": 5.8266, | |
| "step": 2082 | |
| }, | |
| { | |
| "epoch": 0.18515555555555555, | |
| "grad_norm": 523.8809814453125, | |
| "learning_rate": 3e-06, | |
| "loss": -6.2269, | |
| "step": 2083 | |
| }, | |
| { | |
| "epoch": 0.18524444444444443, | |
| "grad_norm": 566.2587890625, | |
| "learning_rate": 3e-06, | |
| "loss": -11.1423, | |
| "step": 2084 | |
| }, | |
| { | |
| "epoch": 0.18533333333333332, | |
| "grad_norm": 575.639892578125, | |
| "learning_rate": 3e-06, | |
| "loss": -8.9986, | |
| "step": 2085 | |
| }, | |
| { | |
| "epoch": 0.1854222222222222, | |
| "grad_norm": 607.2640380859375, | |
| "learning_rate": 3e-06, | |
| "loss": 0.5698, | |
| "step": 2086 | |
| }, | |
| { | |
| "epoch": 0.1855111111111111, | |
| "grad_norm": 685.4660034179688, | |
| "learning_rate": 3e-06, | |
| "loss": -13.8418, | |
| "step": 2087 | |
| }, | |
| { | |
| "epoch": 0.1856, | |
| "grad_norm": 1059.2657470703125, | |
| "learning_rate": 3e-06, | |
| "loss": -3.093, | |
| "step": 2088 | |
| }, | |
| { | |
| "completion_length": 243.375, | |
| "epoch": 0.18568888888888888, | |
| "grad_norm": 533.3504638671875, | |
| "learning_rate": 3e-06, | |
| "loss": -2.4458, | |
| "reward": 1.7916667461395264, | |
| "reward_std": 0.43528565764427185, | |
| "rewards/boxed_and_answer_tags_format_reward": 0.75, | |
| "rewards/correctness_reward_func_math": 1.0416666567325592, | |
| "step": 2089, | |
| "zero_std_ratio": 0.5 | |
| }, | |
| { | |
| "epoch": 0.18577777777777776, | |
| "grad_norm": 466.76678466796875, | |
| "learning_rate": 3e-06, | |
| "loss": -25.5447, | |
| "step": 2090 | |
| }, | |
| { | |
| "epoch": 0.18586666666666668, | |
| "grad_norm": 482.4854736328125, | |
| "learning_rate": 3e-06, | |
| "loss": -11.9751, | |
| "step": 2091 | |
| }, | |
| { | |
| "epoch": 0.18595555555555557, | |
| "grad_norm": 617.836669921875, | |
| "learning_rate": 3e-06, | |
| "loss": -16.149, | |
| "step": 2092 | |
| }, | |
| { | |
| "epoch": 0.18604444444444446, | |
| "grad_norm": 806.7719116210938, | |
| "learning_rate": 3e-06, | |
| "loss": -12.1531, | |
| "step": 2093 | |
| }, | |
| { | |
| "epoch": 0.18613333333333335, | |
| "grad_norm": 436.9642333984375, | |
| "learning_rate": 3e-06, | |
| "loss": -20.6401, | |
| "step": 2094 | |
| }, | |
| { | |
| "epoch": 0.18622222222222223, | |
| "grad_norm": 533.0576171875, | |
| "learning_rate": 3e-06, | |
| "loss": -6.39, | |
| "step": 2095 | |
| }, | |
| { | |
| "epoch": 0.18631111111111112, | |
| "grad_norm": 578.9844360351562, | |
| "learning_rate": 3e-06, | |
| "loss": -28.3945, | |
| "step": 2096 | |
| }, | |
| { | |
| "epoch": 0.1864, | |
| "grad_norm": 518.856201171875, | |
| "learning_rate": 3e-06, | |
| "loss": -18.442, | |
| "step": 2097 | |
| }, | |
| { | |
| "epoch": 0.1864888888888889, | |
| "grad_norm": 681.1263427734375, | |
| "learning_rate": 3e-06, | |
| "loss": -22.9392, | |
| "step": 2098 | |
| }, | |
| { | |
| "epoch": 0.1865777777777778, | |
| "grad_norm": 759.8504028320312, | |
| "learning_rate": 3e-06, | |
| "loss": -19.4906, | |
| "step": 2099 | |
| }, | |
| { | |
| "epoch": 0.18666666666666668, | |
| "grad_norm": 497.5691833496094, | |
| "learning_rate": 3e-06, | |
| "loss": -25.688, | |
| "step": 2100 | |
| }, | |
| { | |
| "completion_length": 247.7291717529297, | |
| "epoch": 0.18675555555555556, | |
| "grad_norm": 766.0548706054688, | |
| "learning_rate": 3e-06, | |
| "loss": 9.0542, | |
| "reward": 1.145833358168602, | |
| "reward_std": 0.20412415266036987, | |
| "rewards/boxed_and_answer_tags_format_reward": 0.5625, | |
| "rewards/correctness_reward_func_math": 0.5833333544433117, | |
| "step": 2101, | |
| "zero_std_ratio": 0.75 | |
| }, | |
| { | |
| "epoch": 0.18684444444444445, | |
| "grad_norm": 274.5552062988281, | |
| "learning_rate": 3e-06, | |
| "loss": -14.4338, | |
| "step": 2102 | |
| }, | |
| { | |
| "epoch": 0.18693333333333334, | |
| "grad_norm": 357.243896484375, | |
| "learning_rate": 3e-06, | |
| "loss": -3.3275, | |
| "step": 2103 | |
| }, | |
| { | |
| "epoch": 0.18702222222222223, | |
| "grad_norm": 349.6654968261719, | |
| "learning_rate": 3e-06, | |
| "loss": -3.714, | |
| "step": 2104 | |
| }, | |
| { | |
| "epoch": 0.18711111111111112, | |
| "grad_norm": 252.651611328125, | |
| "learning_rate": 3e-06, | |
| "loss": 4.8234, | |
| "step": 2105 | |
| }, | |
| { | |
| "epoch": 0.1872, | |
| "grad_norm": 422.1712951660156, | |
| "learning_rate": 3e-06, | |
| "loss": 2.0141, | |
| "step": 2106 | |
| }, | |
| { | |
| "epoch": 0.1872888888888889, | |
| "grad_norm": 848.9307250976562, | |
| "learning_rate": 3e-06, | |
| "loss": 1.1462, | |
| "step": 2107 | |
| }, | |
| { | |
| "epoch": 0.18737777777777778, | |
| "grad_norm": 255.5582275390625, | |
| "learning_rate": 3e-06, | |
| "loss": -17.6895, | |
| "step": 2108 | |
| }, | |
| { | |
| "epoch": 0.18746666666666667, | |
| "grad_norm": 307.6992492675781, | |
| "learning_rate": 3e-06, | |
| "loss": -9.5895, | |
| "step": 2109 | |
| }, | |
| { | |
| "epoch": 0.18755555555555556, | |
| "grad_norm": 277.8653259277344, | |
| "learning_rate": 3e-06, | |
| "loss": -9.602, | |
| "step": 2110 | |
| }, | |
| { | |
| "epoch": 0.18764444444444445, | |
| "grad_norm": 234.34913635253906, | |
| "learning_rate": 3e-06, | |
| "loss": 0.8899, | |
| "step": 2111 | |
| }, | |
| { | |
| "epoch": 0.18773333333333334, | |
| "grad_norm": 355.8470764160156, | |
| "learning_rate": 3e-06, | |
| "loss": -4.6677, | |
| "step": 2112 | |
| }, | |
| { | |
| "completion_length": 250.89583587646484, | |
| "epoch": 0.18782222222222222, | |
| "grad_norm": 810.5846557617188, | |
| "learning_rate": 3e-06, | |
| "loss": 15.7377, | |
| "reward": 1.125, | |
| "reward_std": 0.23116151988506317, | |
| "rewards/boxed_and_answer_tags_format_reward": 0.75, | |
| "rewards/correctness_reward_func_math": 0.375, | |
| "step": 2113, | |
| "zero_std_ratio": 0.75 | |
| }, | |
| { | |
| "epoch": 0.1879111111111111, | |
| "grad_norm": 509.224853515625, | |
| "learning_rate": 3e-06, | |
| "loss": 18.5617, | |
| "step": 2114 | |
| }, | |
| { | |
| "epoch": 0.188, | |
| "grad_norm": 530.5845336914062, | |
| "learning_rate": 3e-06, | |
| "loss": 7.082, | |
| "step": 2115 | |
| }, | |
| { | |
| "epoch": 0.1880888888888889, | |
| "grad_norm": 672.1024169921875, | |
| "learning_rate": 3e-06, | |
| "loss": 1.3842, | |
| "step": 2116 | |
| }, | |
| { | |
| "epoch": 0.18817777777777778, | |
| "grad_norm": 408.4438171386719, | |
| "learning_rate": 3e-06, | |
| "loss": 16.7967, | |
| "step": 2117 | |
| }, | |
| { | |
| "epoch": 0.18826666666666667, | |
| "grad_norm": 569.5597534179688, | |
| "learning_rate": 3e-06, | |
| "loss": 15.6526, | |
| "step": 2118 | |
| }, | |
| { | |
| "epoch": 0.18835555555555555, | |
| "grad_norm": 613.7771606445312, | |
| "learning_rate": 3e-06, | |
| "loss": 11.1284, | |
| "step": 2119 | |
| }, | |
| { | |
| "epoch": 0.18844444444444444, | |
| "grad_norm": 443.5073547363281, | |
| "learning_rate": 3e-06, | |
| "loss": 13.1326, | |
| "step": 2120 | |
| }, | |
| { | |
| "epoch": 0.18853333333333333, | |
| "grad_norm": 745.5543823242188, | |
| "learning_rate": 3e-06, | |
| "loss": 3.177, | |
| "step": 2121 | |
| }, | |
| { | |
| "epoch": 0.18862222222222222, | |
| "grad_norm": 776.3263549804688, | |
| "learning_rate": 3e-06, | |
| "loss": -4.0693, | |
| "step": 2122 | |
| }, | |
| { | |
| "epoch": 0.1887111111111111, | |
| "grad_norm": 439.9760437011719, | |
| "learning_rate": 3e-06, | |
| "loss": 14.0792, | |
| "step": 2123 | |
| }, | |
| { | |
| "epoch": 0.1888, | |
| "grad_norm": 620.5515747070312, | |
| "learning_rate": 3e-06, | |
| "loss": 10.3075, | |
| "step": 2124 | |
| }, | |
| { | |
| "completion_length": 255.89583587646484, | |
| "epoch": 0.18888888888888888, | |
| "grad_norm": 1261.142578125, | |
| "learning_rate": 3e-06, | |
| "loss": -52.8242, | |
| "reward": 1.5416666865348816, | |
| "reward_std": 0.7283531129360199, | |
| "rewards/boxed_and_answer_tags_format_reward": 0.75, | |
| "rewards/correctness_reward_func_math": 0.7916666567325592, | |
| "step": 2125, | |
| "zero_std_ratio": 0.25 | |
| }, | |
| { | |
| "epoch": 0.18897777777777777, | |
| "grad_norm": 828.8499755859375, | |
| "learning_rate": 3e-06, | |
| "loss": -18.1417, | |
| "step": 2126 | |
| }, | |
| { | |
| "epoch": 0.18906666666666666, | |
| "grad_norm": 921.6665649414062, | |
| "learning_rate": 3e-06, | |
| "loss": -28.653, | |
| "step": 2127 | |
| }, | |
| { | |
| "epoch": 0.18915555555555555, | |
| "grad_norm": 1188.288330078125, | |
| "learning_rate": 3e-06, | |
| "loss": -14.6245, | |
| "step": 2128 | |
| }, | |
| { | |
| "epoch": 0.18924444444444444, | |
| "grad_norm": 875.7568969726562, | |
| "learning_rate": 3e-06, | |
| "loss": -19.076, | |
| "step": 2129 | |
| }, | |
| { | |
| "epoch": 0.18933333333333333, | |
| "grad_norm": 935.9678955078125, | |
| "learning_rate": 3e-06, | |
| "loss": -30.5958, | |
| "step": 2130 | |
| }, | |
| { | |
| "epoch": 0.18942222222222221, | |
| "grad_norm": 933.6688842773438, | |
| "learning_rate": 3e-06, | |
| "loss": -56.0788, | |
| "step": 2131 | |
| }, | |
| { | |
| "epoch": 0.1895111111111111, | |
| "grad_norm": 1004.11572265625, | |
| "learning_rate": 3e-06, | |
| "loss": -27.5338, | |
| "step": 2132 | |
| }, | |
| { | |
| "epoch": 0.1896, | |
| "grad_norm": 805.9441528320312, | |
| "learning_rate": 3e-06, | |
| "loss": -38.4037, | |
| "step": 2133 | |
| }, | |
| { | |
| "epoch": 0.18968888888888888, | |
| "grad_norm": 1125.8046875, | |
| "learning_rate": 3e-06, | |
| "loss": -21.7139, | |
| "step": 2134 | |
| }, | |
| { | |
| "epoch": 0.18977777777777777, | |
| "grad_norm": 892.211181640625, | |
| "learning_rate": 3e-06, | |
| "loss": -29.3145, | |
| "step": 2135 | |
| }, | |
| { | |
| "epoch": 0.18986666666666666, | |
| "grad_norm": 895.8474731445312, | |
| "learning_rate": 3e-06, | |
| "loss": -38.8515, | |
| "step": 2136 | |
| }, | |
| { | |
| "completion_length": 241.06250762939453, | |
| "epoch": 0.18995555555555554, | |
| "grad_norm": 587.2543334960938, | |
| "learning_rate": 3e-06, | |
| "loss": -30.24, | |
| "reward": 1.7083333730697632, | |
| "reward_std": 0.5128540322184563, | |
| "rewards/boxed_and_answer_tags_format_reward": 0.625, | |
| "rewards/correctness_reward_func_math": 1.0833333134651184, | |
| "step": 2137, | |
| "zero_std_ratio": 0.5 | |
| }, | |
| { | |
| "epoch": 0.19004444444444443, | |
| "grad_norm": 483.7323913574219, | |
| "learning_rate": 3e-06, | |
| "loss": -54.7314, | |
| "step": 2138 | |
| }, | |
| { | |
| "epoch": 0.19013333333333332, | |
| "grad_norm": 695.5252075195312, | |
| "learning_rate": 3e-06, | |
| "loss": -37.2405, | |
| "step": 2139 | |
| }, | |
| { | |
| "epoch": 0.1902222222222222, | |
| "grad_norm": 659.273681640625, | |
| "learning_rate": 3e-06, | |
| "loss": -54.6989, | |
| "step": 2140 | |
| }, | |
| { | |
| "epoch": 0.1903111111111111, | |
| "grad_norm": 665.203857421875, | |
| "learning_rate": 3e-06, | |
| "loss": -49.4977, | |
| "step": 2141 | |
| }, | |
| { | |
| "epoch": 0.1904, | |
| "grad_norm": 570.3987426757812, | |
| "learning_rate": 3e-06, | |
| "loss": -29.7209, | |
| "step": 2142 | |
| }, | |
| { | |
| "epoch": 0.1904888888888889, | |
| "grad_norm": 543.2655029296875, | |
| "learning_rate": 3e-06, | |
| "loss": -35.8428, | |
| "step": 2143 | |
| }, | |
| { | |
| "epoch": 0.1905777777777778, | |
| "grad_norm": 810.827880859375, | |
| "learning_rate": 3e-06, | |
| "loss": -59.7728, | |
| "step": 2144 | |
| }, | |
| { | |
| "epoch": 0.19066666666666668, | |
| "grad_norm": 702.2298583984375, | |
| "learning_rate": 3e-06, | |
| "loss": -49.0496, | |
| "step": 2145 | |
| }, | |
| { | |
| "epoch": 0.19075555555555557, | |
| "grad_norm": 575.1386108398438, | |
| "learning_rate": 3e-06, | |
| "loss": -64.258, | |
| "step": 2146 | |
| }, | |
| { | |
| "epoch": 0.19084444444444446, | |
| "grad_norm": 646.1061401367188, | |
| "learning_rate": 3e-06, | |
| "loss": -60.6597, | |
| "step": 2147 | |
| }, | |
| { | |
| "epoch": 0.19093333333333334, | |
| "grad_norm": 583.6048583984375, | |
| "learning_rate": 3e-06, | |
| "loss": -42.0407, | |
| "step": 2148 | |
| }, | |
| { | |
| "completion_length": 230.43750762939453, | |
| "epoch": 0.19102222222222223, | |
| "grad_norm": 553.2525024414062, | |
| "learning_rate": 3e-06, | |
| "loss": 31.5824, | |
| "reward": 1.2083333730697632, | |
| "reward_std": 0.46232303977012634, | |
| "rewards/boxed_and_answer_tags_format_reward": 0.625, | |
| "rewards/correctness_reward_func_math": 0.5833333283662796, | |
| "step": 2149, | |
| "zero_std_ratio": 0.5 | |
| }, | |
| { | |
| "epoch": 0.19111111111111112, | |
| "grad_norm": 672.8676147460938, | |
| "learning_rate": 3e-06, | |
| "loss": 26.2286, | |
| "step": 2150 | |
| }, | |
| { | |
| "epoch": 0.1912, | |
| "grad_norm": 594.0144653320312, | |
| "learning_rate": 3e-06, | |
| "loss": -2.6153, | |
| "step": 2151 | |
| }, | |
| { | |
| "epoch": 0.1912888888888889, | |
| "grad_norm": 828.8351440429688, | |
| "learning_rate": 3e-06, | |
| "loss": 2.5627, | |
| "step": 2152 | |
| }, | |
| { | |
| "epoch": 0.19137777777777779, | |
| "grad_norm": 601.149658203125, | |
| "learning_rate": 3e-06, | |
| "loss": -1.2538, | |
| "step": 2153 | |
| }, | |
| { | |
| "epoch": 0.19146666666666667, | |
| "grad_norm": 685.7401123046875, | |
| "learning_rate": 3e-06, | |
| "loss": 8.7495, | |
| "step": 2154 | |
| }, | |
| { | |
| "epoch": 0.19155555555555556, | |
| "grad_norm": 561.976318359375, | |
| "learning_rate": 3e-06, | |
| "loss": 22.0952, | |
| "step": 2155 | |
| }, | |
| { | |
| "epoch": 0.19164444444444445, | |
| "grad_norm": 583.328369140625, | |
| "learning_rate": 3e-06, | |
| "loss": 15.3746, | |
| "step": 2156 | |
| }, | |
| { | |
| "epoch": 0.19173333333333334, | |
| "grad_norm": 495.55609130859375, | |
| "learning_rate": 3e-06, | |
| "loss": -9.5814, | |
| "step": 2157 | |
| }, | |
| { | |
| "epoch": 0.19182222222222223, | |
| "grad_norm": 764.2197265625, | |
| "learning_rate": 3e-06, | |
| "loss": -2.7963, | |
| "step": 2158 | |
| }, | |
| { | |
| "epoch": 0.19191111111111112, | |
| "grad_norm": 2350.23779296875, | |
| "learning_rate": 3e-06, | |
| "loss": -8.0366, | |
| "step": 2159 | |
| }, | |
| { | |
| "epoch": 0.192, | |
| "grad_norm": 904.4937744140625, | |
| "learning_rate": 3e-06, | |
| "loss": -6.0055, | |
| "step": 2160 | |
| }, | |
| { | |
| "completion_length": 247.93750762939453, | |
| "epoch": 0.1920888888888889, | |
| "grad_norm": 899.4290771484375, | |
| "learning_rate": 3e-06, | |
| "loss": 51.9844, | |
| "reward": 1.5625000596046448, | |
| "reward_std": 0.5050228163599968, | |
| "rewards/boxed_and_answer_tags_format_reward": 0.6875, | |
| "rewards/correctness_reward_func_math": 0.8749999701976776, | |
| "step": 2161, | |
| "zero_std_ratio": 0.5 | |
| }, | |
| { | |
| "epoch": 0.19217777777777778, | |
| "grad_norm": 936.0744018554688, | |
| "learning_rate": 3e-06, | |
| "loss": 24.5592, | |
| "step": 2162 | |
| }, | |
| { | |
| "epoch": 0.19226666666666667, | |
| "grad_norm": 865.0958251953125, | |
| "learning_rate": 3e-06, | |
| "loss": 4.2507, | |
| "step": 2163 | |
| }, | |
| { | |
| "epoch": 0.19235555555555556, | |
| "grad_norm": 819.485107421875, | |
| "learning_rate": 3e-06, | |
| "loss": -17.4293, | |
| "step": 2164 | |
| }, | |
| { | |
| "epoch": 0.19244444444444445, | |
| "grad_norm": 823.7230224609375, | |
| "learning_rate": 3e-06, | |
| "loss": -19.1261, | |
| "step": 2165 | |
| }, | |
| { | |
| "epoch": 0.19253333333333333, | |
| "grad_norm": 1002.2861328125, | |
| "learning_rate": 3e-06, | |
| "loss": -19.0886, | |
| "step": 2166 | |
| }, | |
| { | |
| "epoch": 0.19262222222222222, | |
| "grad_norm": 926.2667236328125, | |
| "learning_rate": 3e-06, | |
| "loss": 38.8715, | |
| "step": 2167 | |
| }, | |
| { | |
| "epoch": 0.1927111111111111, | |
| "grad_norm": 890.3988037109375, | |
| "learning_rate": 3e-06, | |
| "loss": 16.1366, | |
| "step": 2168 | |
| }, | |
| { | |
| "epoch": 0.1928, | |
| "grad_norm": 863.7974853515625, | |
| "learning_rate": 3e-06, | |
| "loss": -6.0243, | |
| "step": 2169 | |
| }, | |
| { | |
| "epoch": 0.1928888888888889, | |
| "grad_norm": 675.4772338867188, | |
| "learning_rate": 3e-06, | |
| "loss": -29.941, | |
| "step": 2170 | |
| }, | |
| { | |
| "epoch": 0.19297777777777778, | |
| "grad_norm": 795.9426879882812, | |
| "learning_rate": 3e-06, | |
| "loss": -37.6626, | |
| "step": 2171 | |
| }, | |
| { | |
| "epoch": 0.19306666666666666, | |
| "grad_norm": 1173.4658203125, | |
| "learning_rate": 3e-06, | |
| "loss": -42.7799, | |
| "step": 2172 | |
| }, | |
| { | |
| "completion_length": 255.27083587646484, | |
| "epoch": 0.19315555555555555, | |
| "grad_norm": 1052.441650390625, | |
| "learning_rate": 3e-06, | |
| "loss": 17.4183, | |
| "reward": 1.75, | |
| "reward_std": 0.720521941781044, | |
| "rewards/boxed_and_answer_tags_format_reward": 0.75, | |
| "rewards/correctness_reward_func_math": 1.0, | |
| "step": 2173, | |
| "zero_std_ratio": 0.25 | |
| }, | |
| { | |
| "epoch": 0.19324444444444444, | |
| "grad_norm": 1221.40673828125, | |
| "learning_rate": 3e-06, | |
| "loss": 10.0883, | |
| "step": 2174 | |
| }, | |
| { | |
| "epoch": 0.19333333333333333, | |
| "grad_norm": 733.4461059570312, | |
| "learning_rate": 3e-06, | |
| "loss": 7.5639, | |
| "step": 2175 | |
| }, | |
| { | |
| "epoch": 0.19342222222222222, | |
| "grad_norm": 981.289794921875, | |
| "learning_rate": 3e-06, | |
| "loss": 32.1803, | |
| "step": 2176 | |
| }, | |
| { | |
| "epoch": 0.1935111111111111, | |
| "grad_norm": 1169.4273681640625, | |
| "learning_rate": 3e-06, | |
| "loss": 38.9057, | |
| "step": 2177 | |
| }, | |
| { | |
| "epoch": 0.1936, | |
| "grad_norm": 1102.001220703125, | |
| "learning_rate": 3e-06, | |
| "loss": 11.3484, | |
| "step": 2178 | |
| }, | |
| { | |
| "epoch": 0.19368888888888888, | |
| "grad_norm": 992.0403442382812, | |
| "learning_rate": 3e-06, | |
| "loss": 17.4565, | |
| "step": 2179 | |
| }, | |
| { | |
| "epoch": 0.19377777777777777, | |
| "grad_norm": 1574.7171630859375, | |
| "learning_rate": 3e-06, | |
| "loss": -4.7767, | |
| "step": 2180 | |
| }, | |
| { | |
| "epoch": 0.19386666666666666, | |
| "grad_norm": 713.719482421875, | |
| "learning_rate": 3e-06, | |
| "loss": -2.3293, | |
| "step": 2181 | |
| }, | |
| { | |
| "epoch": 0.19395555555555555, | |
| "grad_norm": 999.0922241210938, | |
| "learning_rate": 3e-06, | |
| "loss": 20.7797, | |
| "step": 2182 | |
| }, | |
| { | |
| "epoch": 0.19404444444444444, | |
| "grad_norm": 1164.4508056640625, | |
| "learning_rate": 3e-06, | |
| "loss": 22.3378, | |
| "step": 2183 | |
| }, | |
| { | |
| "epoch": 0.19413333333333332, | |
| "grad_norm": 1133.1566162109375, | |
| "learning_rate": 3e-06, | |
| "loss": -3.4018, | |
| "step": 2184 | |
| }, | |
| { | |
| "completion_length": 246.64584350585938, | |
| "epoch": 0.1942222222222222, | |
| "grad_norm": 668.209716796875, | |
| "learning_rate": 3e-06, | |
| "loss": -9.4869, | |
| "reward": 1.7083333730697632, | |
| "reward_std": 0.3061862215399742, | |
| "rewards/boxed_and_answer_tags_format_reward": 0.75, | |
| "rewards/correctness_reward_func_math": 0.9583333432674408, | |
| "step": 2185, | |
| "zero_std_ratio": 0.625 | |
| }, | |
| { | |
| "epoch": 0.1943111111111111, | |
| "grad_norm": 433.6723937988281, | |
| "learning_rate": 3e-06, | |
| "loss": -11.4995, | |
| "step": 2186 | |
| }, | |
| { | |
| "epoch": 0.1944, | |
| "grad_norm": 682.7325439453125, | |
| "learning_rate": 3e-06, | |
| "loss": -28.6521, | |
| "step": 2187 | |
| }, | |
| { | |
| "epoch": 0.19448888888888888, | |
| "grad_norm": 636.8472900390625, | |
| "learning_rate": 3e-06, | |
| "loss": -13.4065, | |
| "step": 2188 | |
| }, | |
| { | |
| "epoch": 0.19457777777777777, | |
| "grad_norm": 447.03173828125, | |
| "learning_rate": 3e-06, | |
| "loss": -18.6822, | |
| "step": 2189 | |
| }, | |
| { | |
| "epoch": 0.19466666666666665, | |
| "grad_norm": 855.8515014648438, | |
| "learning_rate": 3e-06, | |
| "loss": -18.9689, | |
| "step": 2190 | |
| }, | |
| { | |
| "epoch": 0.19475555555555554, | |
| "grad_norm": 1044.5152587890625, | |
| "learning_rate": 3e-06, | |
| "loss": -20.745, | |
| "step": 2191 | |
| }, | |
| { | |
| "epoch": 0.19484444444444443, | |
| "grad_norm": 422.6670837402344, | |
| "learning_rate": 3e-06, | |
| "loss": -16.4546, | |
| "step": 2192 | |
| }, | |
| { | |
| "epoch": 0.19493333333333332, | |
| "grad_norm": 698.4715576171875, | |
| "learning_rate": 3e-06, | |
| "loss": -37.7735, | |
| "step": 2193 | |
| }, | |
| { | |
| "epoch": 0.19502222222222224, | |
| "grad_norm": 689.1241455078125, | |
| "learning_rate": 3e-06, | |
| "loss": -24.1425, | |
| "step": 2194 | |
| }, | |
| { | |
| "epoch": 0.19511111111111112, | |
| "grad_norm": 521.723876953125, | |
| "learning_rate": 3e-06, | |
| "loss": -25.6598, | |
| "step": 2195 | |
| }, | |
| { | |
| "epoch": 0.1952, | |
| "grad_norm": 641.6820678710938, | |
| "learning_rate": 3e-06, | |
| "loss": -27.7095, | |
| "step": 2196 | |
| }, | |
| { | |
| "completion_length": 250.18750762939453, | |
| "epoch": 0.1952888888888889, | |
| "grad_norm": 695.4179077148438, | |
| "learning_rate": 3e-06, | |
| "loss": -23.0851, | |
| "reward": 1.5625000596046448, | |
| "reward_std": 0.23116153478622437, | |
| "rewards/boxed_and_answer_tags_format_reward": 0.6875, | |
| "rewards/correctness_reward_func_math": 0.8750000149011612, | |
| "step": 2197, | |
| "zero_std_ratio": 0.75 | |
| }, | |
| { | |
| "epoch": 0.1953777777777778, | |
| "grad_norm": 606.8045654296875, | |
| "learning_rate": 3e-06, | |
| "loss": 1.2567, | |
| "step": 2198 | |
| }, | |
| { | |
| "epoch": 0.19546666666666668, | |
| "grad_norm": 742.4933471679688, | |
| "learning_rate": 3e-06, | |
| "loss": 1.28, | |
| "step": 2199 | |
| }, | |
| { | |
| "epoch": 0.19555555555555557, | |
| "grad_norm": 555.1710205078125, | |
| "learning_rate": 3e-06, | |
| "loss": -2.6916, | |
| "step": 2200 | |
| }, | |
| { | |
| "epoch": 0.19564444444444445, | |
| "grad_norm": 658.5838012695312, | |
| "learning_rate": 3e-06, | |
| "loss": -3.5826, | |
| "step": 2201 | |
| }, | |
| { | |
| "epoch": 0.19573333333333334, | |
| "grad_norm": 481.04693603515625, | |
| "learning_rate": 3e-06, | |
| "loss": -9.7868, | |
| "step": 2202 | |
| }, | |
| { | |
| "epoch": 0.19582222222222223, | |
| "grad_norm": 583.1636352539062, | |
| "learning_rate": 3e-06, | |
| "loss": -26.7921, | |
| "step": 2203 | |
| }, | |
| { | |
| "epoch": 0.19591111111111112, | |
| "grad_norm": 582.9187622070312, | |
| "learning_rate": 3e-06, | |
| "loss": -1.0781, | |
| "step": 2204 | |
| }, | |
| { | |
| "epoch": 0.196, | |
| "grad_norm": 660.7078247070312, | |
| "learning_rate": 3e-06, | |
| "loss": -2.465, | |
| "step": 2205 | |
| }, | |
| { | |
| "epoch": 0.1960888888888889, | |
| "grad_norm": 522.7738647460938, | |
| "learning_rate": 3e-06, | |
| "loss": -3.8803, | |
| "step": 2206 | |
| }, | |
| { | |
| "epoch": 0.19617777777777778, | |
| "grad_norm": 1109.6839599609375, | |
| "learning_rate": 3e-06, | |
| "loss": -13.524, | |
| "step": 2207 | |
| }, | |
| { | |
| "epoch": 0.19626666666666667, | |
| "grad_norm": 588.8282470703125, | |
| "learning_rate": 3e-06, | |
| "loss": -12.7292, | |
| "step": 2208 | |
| }, | |
| { | |
| "completion_length": 244.9791717529297, | |
| "epoch": 0.19635555555555556, | |
| "grad_norm": 403.5253601074219, | |
| "learning_rate": 3e-06, | |
| "loss": -24.4641, | |
| "reward": 1.4895833730697632, | |
| "reward_std": 0.20219219475984573, | |
| "rewards/boxed_and_answer_tags_format_reward": 0.7395833432674408, | |
| "rewards/correctness_reward_func_math": 0.75, | |
| "step": 2209, | |
| "zero_std_ratio": 0.75 | |
| }, | |
| { | |
| "epoch": 0.19644444444444445, | |
| "grad_norm": 545.7481079101562, | |
| "learning_rate": 3e-06, | |
| "loss": -28.5028, | |
| "step": 2210 | |
| }, | |
| { | |
| "epoch": 0.19653333333333334, | |
| "grad_norm": 737.4202270507812, | |
| "learning_rate": 3e-06, | |
| "loss": -21.5566, | |
| "step": 2211 | |
| }, | |
| { | |
| "epoch": 0.19662222222222223, | |
| "grad_norm": 415.2235412597656, | |
| "learning_rate": 3e-06, | |
| "loss": -19.7415, | |
| "step": 2212 | |
| }, | |
| { | |
| "epoch": 0.19671111111111111, | |
| "grad_norm": 661.0859375, | |
| "learning_rate": 3e-06, | |
| "loss": -27.5604, | |
| "step": 2213 | |
| }, | |
| { | |
| "epoch": 0.1968, | |
| "grad_norm": 688.0073852539062, | |
| "learning_rate": 3e-06, | |
| "loss": -25.3044, | |
| "step": 2214 | |
| }, | |
| { | |
| "epoch": 0.1968888888888889, | |
| "grad_norm": 377.343505859375, | |
| "learning_rate": 3e-06, | |
| "loss": -30.7918, | |
| "step": 2215 | |
| }, | |
| { | |
| "epoch": 0.19697777777777778, | |
| "grad_norm": 535.9216918945312, | |
| "learning_rate": 3e-06, | |
| "loss": -28.4559, | |
| "step": 2216 | |
| }, | |
| { | |
| "epoch": 0.19706666666666667, | |
| "grad_norm": 631.3789672851562, | |
| "learning_rate": 3e-06, | |
| "loss": -28.4084, | |
| "step": 2217 | |
| }, | |
| { | |
| "epoch": 0.19715555555555556, | |
| "grad_norm": 467.3281555175781, | |
| "learning_rate": 3e-06, | |
| "loss": -24.7054, | |
| "step": 2218 | |
| }, | |
| { | |
| "epoch": 0.19724444444444444, | |
| "grad_norm": 673.6080932617188, | |
| "learning_rate": 3e-06, | |
| "loss": -30.1266, | |
| "step": 2219 | |
| }, | |
| { | |
| "epoch": 0.19733333333333333, | |
| "grad_norm": 650.60009765625, | |
| "learning_rate": 3e-06, | |
| "loss": -30.6526, | |
| "step": 2220 | |
| }, | |
| { | |
| "completion_length": 251.27083587646484, | |
| "epoch": 0.19742222222222222, | |
| "grad_norm": 937.9761962890625, | |
| "learning_rate": 3e-06, | |
| "loss": -1.5455, | |
| "reward": 1.9166667461395264, | |
| "reward_std": 0.4779854342341423, | |
| "rewards/boxed_and_answer_tags_format_reward": 0.75, | |
| "rewards/correctness_reward_func_math": 1.1666666567325592, | |
| "step": 2221, | |
| "zero_std_ratio": 0.5 | |
| }, | |
| { | |
| "epoch": 0.1975111111111111, | |
| "grad_norm": 618.4127807617188, | |
| "learning_rate": 3e-06, | |
| "loss": 8.2676, | |
| "step": 2222 | |
| }, | |
| { | |
| "epoch": 0.1976, | |
| "grad_norm": 681.9295654296875, | |
| "learning_rate": 3e-06, | |
| "loss": 5.7096, | |
| "step": 2223 | |
| }, | |
| { | |
| "epoch": 0.1976888888888889, | |
| "grad_norm": 1006.9591064453125, | |
| "learning_rate": 3e-06, | |
| "loss": 52.6603, | |
| "step": 2224 | |
| }, | |
| { | |
| "epoch": 0.19777777777777777, | |
| "grad_norm": 786.645263671875, | |
| "learning_rate": 3e-06, | |
| "loss": 7.371, | |
| "step": 2225 | |
| }, | |
| { | |
| "epoch": 0.19786666666666666, | |
| "grad_norm": 779.1517333984375, | |
| "learning_rate": 3e-06, | |
| "loss": 15.4347, | |
| "step": 2226 | |
| }, | |
| { | |
| "epoch": 0.19795555555555555, | |
| "grad_norm": 1029.3656005859375, | |
| "learning_rate": 3e-06, | |
| "loss": -12.7798, | |
| "step": 2227 | |
| }, | |
| { | |
| "epoch": 0.19804444444444444, | |
| "grad_norm": 1035.653076171875, | |
| "learning_rate": 3e-06, | |
| "loss": -0.4735, | |
| "step": 2228 | |
| }, | |
| { | |
| "epoch": 0.19813333333333333, | |
| "grad_norm": 829.162841796875, | |
| "learning_rate": 3e-06, | |
| "loss": -2.8614, | |
| "step": 2229 | |
| }, | |
| { | |
| "epoch": 0.19822222222222222, | |
| "grad_norm": 873.4863891601562, | |
| "learning_rate": 3e-06, | |
| "loss": 30.9012, | |
| "step": 2230 | |
| }, | |
| { | |
| "epoch": 0.1983111111111111, | |
| "grad_norm": 649.9591674804688, | |
| "learning_rate": 3e-06, | |
| "loss": -6.5108, | |
| "step": 2231 | |
| }, | |
| { | |
| "epoch": 0.1984, | |
| "grad_norm": 805.36328125, | |
| "learning_rate": 3e-06, | |
| "loss": 2.5386, | |
| "step": 2232 | |
| }, | |
| { | |
| "completion_length": 246.1041717529297, | |
| "epoch": 0.19848888888888888, | |
| "grad_norm": 895.270263671875, | |
| "learning_rate": 3e-06, | |
| "loss": -57.1749, | |
| "reward": 1.2500000596046448, | |
| "reward_std": 0.4779854342341423, | |
| "rewards/boxed_and_answer_tags_format_reward": 0.75, | |
| "rewards/correctness_reward_func_math": 0.5000000111758709, | |
| "step": 2233, | |
| "zero_std_ratio": 0.5 | |
| }, | |
| { | |
| "epoch": 0.19857777777777777, | |
| "grad_norm": 765.6522827148438, | |
| "learning_rate": 3e-06, | |
| "loss": -48.4423, | |
| "step": 2234 | |
| }, | |
| { | |
| "epoch": 0.19866666666666666, | |
| "grad_norm": 755.2459716796875, | |
| "learning_rate": 3e-06, | |
| "loss": -29.8722, | |
| "step": 2235 | |
| }, | |
| { | |
| "epoch": 0.19875555555555555, | |
| "grad_norm": 685.5021362304688, | |
| "learning_rate": 3e-06, | |
| "loss": -19.4638, | |
| "step": 2236 | |
| }, | |
| { | |
| "epoch": 0.19884444444444443, | |
| "grad_norm": 832.52880859375, | |
| "learning_rate": 3e-06, | |
| "loss": -41.564, | |
| "step": 2237 | |
| }, | |
| { | |
| "epoch": 0.19893333333333332, | |
| "grad_norm": 681.293701171875, | |
| "learning_rate": 3e-06, | |
| "loss": -56.8457, | |
| "step": 2238 | |
| }, | |
| { | |
| "epoch": 0.1990222222222222, | |
| "grad_norm": 733.9737548828125, | |
| "learning_rate": 3e-06, | |
| "loss": -71.0826, | |
| "step": 2239 | |
| }, | |
| { | |
| "epoch": 0.1991111111111111, | |
| "grad_norm": 759.0897216796875, | |
| "learning_rate": 3e-06, | |
| "loss": -59.8898, | |
| "step": 2240 | |
| }, | |
| { | |
| "epoch": 0.1992, | |
| "grad_norm": 752.1569213867188, | |
| "learning_rate": 3e-06, | |
| "loss": -35.6279, | |
| "step": 2241 | |
| }, | |
| { | |
| "epoch": 0.19928888888888888, | |
| "grad_norm": 793.9288330078125, | |
| "learning_rate": 3e-06, | |
| "loss": -31.1288, | |
| "step": 2242 | |
| }, | |
| { | |
| "epoch": 0.19937777777777776, | |
| "grad_norm": 875.0328979492188, | |
| "learning_rate": 3e-06, | |
| "loss": -54.7059, | |
| "step": 2243 | |
| }, | |
| { | |
| "epoch": 0.19946666666666665, | |
| "grad_norm": 735.8705444335938, | |
| "learning_rate": 3e-06, | |
| "loss": -64.119, | |
| "step": 2244 | |
| }, | |
| { | |
| "completion_length": 245.1875, | |
| "epoch": 0.19955555555555557, | |
| "grad_norm": 691.626953125, | |
| "learning_rate": 3e-06, | |
| "loss": 33.6045, | |
| "reward": 1.0729166865348816, | |
| "reward_std": 0.41281384229660034, | |
| "rewards/boxed_and_answer_tags_format_reward": 0.7395833432674408, | |
| "rewards/correctness_reward_func_math": 0.3333333432674408, | |
| "step": 2245, | |
| "zero_std_ratio": 0.5 | |
| }, | |
| { | |
| "epoch": 0.19964444444444446, | |
| "grad_norm": 847.1795654296875, | |
| "learning_rate": 3e-06, | |
| "loss": 36.038, | |
| "step": 2246 | |
| }, | |
| { | |
| "epoch": 0.19973333333333335, | |
| "grad_norm": 502.91473388671875, | |
| "learning_rate": 3e-06, | |
| "loss": 22.1079, | |
| "step": 2247 | |
| }, | |
| { | |
| "epoch": 0.19982222222222223, | |
| "grad_norm": 577.439697265625, | |
| "learning_rate": 3e-06, | |
| "loss": 33.3352, | |
| "step": 2248 | |
| }, | |
| { | |
| "epoch": 0.19991111111111112, | |
| "grad_norm": 830.4883422851562, | |
| "learning_rate": 3e-06, | |
| "loss": 41.7401, | |
| "step": 2249 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "grad_norm": 769.1807861328125, | |
| "learning_rate": 3e-06, | |
| "loss": 37.7994, | |
| "step": 2250 | |
| }, | |
| { | |
| "epoch": 0.2000888888888889, | |
| "grad_norm": 636.1270141601562, | |
| "learning_rate": 3e-06, | |
| "loss": 27.5784, | |
| "step": 2251 | |
| }, | |
| { | |
| "epoch": 0.2001777777777778, | |
| "grad_norm": 761.4356079101562, | |
| "learning_rate": 3e-06, | |
| "loss": 27.7986, | |
| "step": 2252 | |
| }, | |
| { | |
| "epoch": 0.20026666666666668, | |
| "grad_norm": 530.1929931640625, | |
| "learning_rate": 3e-06, | |
| "loss": 17.7146, | |
| "step": 2253 | |
| }, | |
| { | |
| "epoch": 0.20035555555555556, | |
| "grad_norm": 564.9805908203125, | |
| "learning_rate": 3e-06, | |
| "loss": 27.1709, | |
| "step": 2254 | |
| }, | |
| { | |
| "epoch": 0.20044444444444445, | |
| "grad_norm": 600.7700805664062, | |
| "learning_rate": 3e-06, | |
| "loss": 37.6539, | |
| "step": 2255 | |
| }, | |
| { | |
| "epoch": 0.20053333333333334, | |
| "grad_norm": 710.3458862304688, | |
| "learning_rate": 3e-06, | |
| "loss": 30.2361, | |
| "step": 2256 | |
| }, | |
| { | |
| "completion_length": 249.9791717529297, | |
| "epoch": 0.20062222222222223, | |
| "grad_norm": 404.9451904296875, | |
| "learning_rate": 3e-06, | |
| "loss": -93.8459, | |
| "reward": 1.1458333730697632, | |
| "reward_std": 0.23116151988506317, | |
| "rewards/boxed_and_answer_tags_format_reward": 0.6875, | |
| "rewards/correctness_reward_func_math": 0.4583333283662796, | |
| "step": 2257, | |
| "zero_std_ratio": 0.75 | |
| }, | |
| { | |
| "epoch": 0.20071111111111112, | |
| "grad_norm": 394.6553649902344, | |
| "learning_rate": 3e-06, | |
| "loss": -76.6418, | |
| "step": 2258 | |
| }, | |
| { | |
| "epoch": 0.2008, | |
| "grad_norm": 366.0687255859375, | |
| "learning_rate": 3e-06, | |
| "loss": -73.8687, | |
| "step": 2259 | |
| }, | |
| { | |
| "epoch": 0.2008888888888889, | |
| "grad_norm": 464.67388916015625, | |
| "learning_rate": 3e-06, | |
| "loss": -82.5149, | |
| "step": 2260 | |
| }, | |
| { | |
| "epoch": 0.20097777777777778, | |
| "grad_norm": 437.32470703125, | |
| "learning_rate": 3e-06, | |
| "loss": -62.3202, | |
| "step": 2261 | |
| }, | |
| { | |
| "epoch": 0.20106666666666667, | |
| "grad_norm": 884.9143676757812, | |
| "learning_rate": 3e-06, | |
| "loss": -76.1318, | |
| "step": 2262 | |
| }, | |
| { | |
| "epoch": 0.20115555555555556, | |
| "grad_norm": 431.22882080078125, | |
| "learning_rate": 3e-06, | |
| "loss": -101.0349, | |
| "step": 2263 | |
| }, | |
| { | |
| "epoch": 0.20124444444444445, | |
| "grad_norm": 407.47344970703125, | |
| "learning_rate": 3e-06, | |
| "loss": -83.3404, | |
| "step": 2264 | |
| }, | |
| { | |
| "epoch": 0.20133333333333334, | |
| "grad_norm": 450.6275634765625, | |
| "learning_rate": 3e-06, | |
| "loss": -81.2565, | |
| "step": 2265 | |
| }, | |
| { | |
| "epoch": 0.20142222222222222, | |
| "grad_norm": 446.26715087890625, | |
| "learning_rate": 3e-06, | |
| "loss": -92.9891, | |
| "step": 2266 | |
| }, | |
| { | |
| "epoch": 0.2015111111111111, | |
| "grad_norm": 421.58514404296875, | |
| "learning_rate": 3e-06, | |
| "loss": -72.7071, | |
| "step": 2267 | |
| }, | |
| { | |
| "epoch": 0.2016, | |
| "grad_norm": 499.9620361328125, | |
| "learning_rate": 3e-06, | |
| "loss": -86.6616, | |
| "step": 2268 | |
| }, | |
| { | |
| "completion_length": 251.43750762939453, | |
| "epoch": 0.2016888888888889, | |
| "grad_norm": 1807.4974365234375, | |
| "learning_rate": 3e-06, | |
| "loss": -2.4199, | |
| "reward": 1.4375, | |
| "reward_std": 0.46232306957244873, | |
| "rewards/boxed_and_answer_tags_format_reward": 0.6875, | |
| "rewards/correctness_reward_func_math": 0.75, | |
| "step": 2269, | |
| "zero_std_ratio": 0.5 | |
| }, | |
| { | |
| "epoch": 0.20177777777777778, | |
| "grad_norm": 1014.031005859375, | |
| "learning_rate": 3e-06, | |
| "loss": -11.0913, | |
| "step": 2270 | |
| }, | |
| { | |
| "epoch": 0.20186666666666667, | |
| "grad_norm": 969.154541015625, | |
| "learning_rate": 3e-06, | |
| "loss": -45.8343, | |
| "step": 2271 | |
| }, | |
| { | |
| "epoch": 0.20195555555555555, | |
| "grad_norm": 1514.218994140625, | |
| "learning_rate": 3e-06, | |
| "loss": -6.767, | |
| "step": 2272 | |
| }, | |
| { | |
| "epoch": 0.20204444444444444, | |
| "grad_norm": 1047.0552978515625, | |
| "learning_rate": 3e-06, | |
| "loss": -41.9022, | |
| "step": 2273 | |
| }, | |
| { | |
| "epoch": 0.20213333333333333, | |
| "grad_norm": 1069.608642578125, | |
| "learning_rate": 3e-06, | |
| "loss": -32.74, | |
| "step": 2274 | |
| }, | |
| { | |
| "epoch": 0.20222222222222222, | |
| "grad_norm": 1186.3797607421875, | |
| "learning_rate": 3e-06, | |
| "loss": -5.627, | |
| "step": 2275 | |
| }, | |
| { | |
| "epoch": 0.2023111111111111, | |
| "grad_norm": 1353.1217041015625, | |
| "learning_rate": 3e-06, | |
| "loss": -20.7514, | |
| "step": 2276 | |
| }, | |
| { | |
| "epoch": 0.2024, | |
| "grad_norm": 973.7822875976562, | |
| "learning_rate": 3e-06, | |
| "loss": -57.7389, | |
| "step": 2277 | |
| }, | |
| { | |
| "epoch": 0.20248888888888888, | |
| "grad_norm": 1122.6533203125, | |
| "learning_rate": 3e-06, | |
| "loss": -19.221, | |
| "step": 2278 | |
| }, | |
| { | |
| "epoch": 0.20257777777777777, | |
| "grad_norm": 1007.2978515625, | |
| "learning_rate": 3e-06, | |
| "loss": -49.3046, | |
| "step": 2279 | |
| }, | |
| { | |
| "epoch": 0.20266666666666666, | |
| "grad_norm": 1087.270751953125, | |
| "learning_rate": 3e-06, | |
| "loss": -46.9498, | |
| "step": 2280 | |
| }, | |
| { | |
| "completion_length": 248.0, | |
| "epoch": 0.20275555555555555, | |
| "grad_norm": 684.866455078125, | |
| "learning_rate": 3e-06, | |
| "loss": -24.7332, | |
| "reward": 1.0416666865348816, | |
| "reward_std": 0.40296074748039246, | |
| "rewards/boxed_and_answer_tags_format_reward": 0.625, | |
| "rewards/correctness_reward_func_math": 0.4166666716337204, | |
| "step": 2281, | |
| "zero_std_ratio": 0.625 | |
| }, | |
| { | |
| "epoch": 0.20284444444444444, | |
| "grad_norm": 685.3329467773438, | |
| "learning_rate": 3e-06, | |
| "loss": 10.4014, | |
| "step": 2282 | |
| }, | |
| { | |
| "epoch": 0.20293333333333333, | |
| "grad_norm": 828.5563354492188, | |
| "learning_rate": 3e-06, | |
| "loss": -17.5656, | |
| "step": 2283 | |
| }, | |
| { | |
| "epoch": 0.20302222222222222, | |
| "grad_norm": 797.2943115234375, | |
| "learning_rate": 3e-06, | |
| "loss": -24.5962, | |
| "step": 2284 | |
| }, | |
| { | |
| "epoch": 0.2031111111111111, | |
| "grad_norm": 778.4367065429688, | |
| "learning_rate": 3e-06, | |
| "loss": 13.921, | |
| "step": 2285 | |
| }, | |
| { | |
| "epoch": 0.2032, | |
| "grad_norm": 694.8433837890625, | |
| "learning_rate": 3e-06, | |
| "loss": -1.7194, | |
| "step": 2286 | |
| }, | |
| { | |
| "epoch": 0.20328888888888888, | |
| "grad_norm": 808.0003662109375, | |
| "learning_rate": 3e-06, | |
| "loss": -30.4908, | |
| "step": 2287 | |
| }, | |
| { | |
| "epoch": 0.20337777777777777, | |
| "grad_norm": 724.6696166992188, | |
| "learning_rate": 3e-06, | |
| "loss": 2.6068, | |
| "step": 2288 | |
| }, | |
| { | |
| "epoch": 0.20346666666666666, | |
| "grad_norm": 830.2708129882812, | |
| "learning_rate": 3e-06, | |
| "loss": -28.4917, | |
| "step": 2289 | |
| }, | |
| { | |
| "epoch": 0.20355555555555555, | |
| "grad_norm": 785.9896850585938, | |
| "learning_rate": 3e-06, | |
| "loss": -31.1686, | |
| "step": 2290 | |
| }, | |
| { | |
| "epoch": 0.20364444444444443, | |
| "grad_norm": 851.0347290039062, | |
| "learning_rate": 3e-06, | |
| "loss": 0.2481, | |
| "step": 2291 | |
| }, | |
| { | |
| "epoch": 0.20373333333333332, | |
| "grad_norm": 774.1303100585938, | |
| "learning_rate": 3e-06, | |
| "loss": -6.2776, | |
| "step": 2292 | |
| }, | |
| { | |
| "completion_length": 246.0625, | |
| "epoch": 0.2038222222222222, | |
| "grad_norm": 635.4057006835938, | |
| "learning_rate": 3e-06, | |
| "loss": 4.3122, | |
| "reward": 1.5104167461395264, | |
| "reward_std": 0.34120412170886993, | |
| "rewards/boxed_and_answer_tags_format_reward": 0.6770833432674408, | |
| "rewards/correctness_reward_func_math": 0.8333333283662796, | |
| "step": 2293, | |
| "zero_std_ratio": 0.625 | |
| }, | |
| { | |
| "epoch": 0.2039111111111111, | |
| "grad_norm": 653.86328125, | |
| "learning_rate": 3e-06, | |
| "loss": 12.7972, | |
| "step": 2294 | |
| }, | |
| { | |
| "epoch": 0.204, | |
| "grad_norm": 610.3839721679688, | |
| "learning_rate": 3e-06, | |
| "loss": -2.9267, | |
| "step": 2295 | |
| }, | |
| { | |
| "epoch": 0.20408888888888888, | |
| "grad_norm": 714.1361083984375, | |
| "learning_rate": 3e-06, | |
| "loss": -23.5832, | |
| "step": 2296 | |
| }, | |
| { | |
| "epoch": 0.2041777777777778, | |
| "grad_norm": 718.49658203125, | |
| "learning_rate": 3e-06, | |
| "loss": -4.627, | |
| "step": 2297 | |
| }, | |
| { | |
| "epoch": 0.20426666666666668, | |
| "grad_norm": 696.8006591796875, | |
| "learning_rate": 3e-06, | |
| "loss": -11.8357, | |
| "step": 2298 | |
| }, | |
| { | |
| "epoch": 0.20435555555555557, | |
| "grad_norm": 814.3312377929688, | |
| "learning_rate": 3e-06, | |
| "loss": -1.5097, | |
| "step": 2299 | |
| }, | |
| { | |
| "epoch": 0.20444444444444446, | |
| "grad_norm": 556.5509643554688, | |
| "learning_rate": 3e-06, | |
| "loss": 10.6618, | |
| "step": 2300 | |
| }, | |
| { | |
| "epoch": 0.20453333333333334, | |
| "grad_norm": 667.0651245117188, | |
| "learning_rate": 3e-06, | |
| "loss": -6.0333, | |
| "step": 2301 | |
| }, | |
| { | |
| "epoch": 0.20462222222222223, | |
| "grad_norm": 694.1640625, | |
| "learning_rate": 3e-06, | |
| "loss": -24.5284, | |
| "step": 2302 | |
| }, | |
| { | |
| "epoch": 0.20471111111111112, | |
| "grad_norm": 923.972900390625, | |
| "learning_rate": 3e-06, | |
| "loss": -11.2173, | |
| "step": 2303 | |
| }, | |
| { | |
| "epoch": 0.2048, | |
| "grad_norm": 659.1800537109375, | |
| "learning_rate": 3e-06, | |
| "loss": -16.7782, | |
| "step": 2304 | |
| }, | |
| { | |
| "completion_length": 254.4375, | |
| "epoch": 0.2048888888888889, | |
| "grad_norm": 971.201171875, | |
| "learning_rate": 3e-06, | |
| "loss": -6.4646, | |
| "reward": 2.125, | |
| "reward_std": 0.43528565764427185, | |
| "rewards/boxed_and_answer_tags_format_reward": 0.75, | |
| "rewards/correctness_reward_func_math": 1.375, | |
| "step": 2305, | |
| "zero_std_ratio": 0.5 | |
| }, | |
| { | |
| "epoch": 0.2049777777777778, | |
| "grad_norm": 1799.6265869140625, | |
| "learning_rate": 3e-06, | |
| "loss": 13.6479, | |
| "step": 2306 | |
| }, | |
| { | |
| "epoch": 0.20506666666666667, | |
| "grad_norm": 1432.3759765625, | |
| "learning_rate": 3e-06, | |
| "loss": -12.8131, | |
| "step": 2307 | |
| }, | |
| { | |
| "epoch": 0.20515555555555556, | |
| "grad_norm": 1100.3665771484375, | |
| "learning_rate": 3e-06, | |
| "loss": 19.3096, | |
| "step": 2308 | |
| }, | |
| { | |
| "epoch": 0.20524444444444445, | |
| "grad_norm": 954.0089111328125, | |
| "learning_rate": 3e-06, | |
| "loss": -5.624, | |
| "step": 2309 | |
| }, | |
| { | |
| "epoch": 0.20533333333333334, | |
| "grad_norm": 1022.4109497070312, | |
| "learning_rate": 3e-06, | |
| "loss": -26.2045, | |
| "step": 2310 | |
| }, | |
| { | |
| "epoch": 0.20542222222222223, | |
| "grad_norm": 1009.416015625, | |
| "learning_rate": 3e-06, | |
| "loss": -9.7483, | |
| "step": 2311 | |
| }, | |
| { | |
| "epoch": 0.20551111111111112, | |
| "grad_norm": 1540.92333984375, | |
| "learning_rate": 3e-06, | |
| "loss": 0.92, | |
| "step": 2312 | |
| }, | |
| { | |
| "epoch": 0.2056, | |
| "grad_norm": 1027.509765625, | |
| "learning_rate": 3e-06, | |
| "loss": -24.657, | |
| "step": 2313 | |
| }, | |
| { | |
| "epoch": 0.2056888888888889, | |
| "grad_norm": 948.5579833984375, | |
| "learning_rate": 3e-06, | |
| "loss": 8.6437, | |
| "step": 2314 | |
| }, | |
| { | |
| "epoch": 0.20577777777777778, | |
| "grad_norm": 919.4548950195312, | |
| "learning_rate": 3e-06, | |
| "loss": -21.9334, | |
| "step": 2315 | |
| }, | |
| { | |
| "epoch": 0.20586666666666667, | |
| "grad_norm": 979.2217407226562, | |
| "learning_rate": 3e-06, | |
| "loss": -36.1305, | |
| "step": 2316 | |
| }, | |
| { | |
| "completion_length": 253.52083587646484, | |
| "epoch": 0.20595555555555556, | |
| "grad_norm": 382.2681579589844, | |
| "learning_rate": 3e-06, | |
| "loss": 13.32, | |
| "reward": 0.9791666865348816, | |
| "reward_std": 0.10206206887960434, | |
| "rewards/boxed_and_answer_tags_format_reward": 0.6875, | |
| "rewards/correctness_reward_func_math": 0.2916666679084301, | |
| "step": 2317, | |
| "zero_std_ratio": 0.875 | |
| }, | |
| { | |
| "epoch": 0.20604444444444445, | |
| "grad_norm": 585.9896850585938, | |
| "learning_rate": 3e-06, | |
| "loss": 14.567, | |
| "step": 2318 | |
| }, | |
| { | |
| "epoch": 0.20613333333333334, | |
| "grad_norm": 433.8167419433594, | |
| "learning_rate": 3e-06, | |
| "loss": 0.179, | |
| "step": 2319 | |
| }, | |
| { | |
| "epoch": 0.20622222222222222, | |
| "grad_norm": 473.40289306640625, | |
| "learning_rate": 3e-06, | |
| "loss": -8.1342, | |
| "step": 2320 | |
| }, | |
| { | |
| "epoch": 0.2063111111111111, | |
| "grad_norm": 454.9488220214844, | |
| "learning_rate": 3e-06, | |
| "loss": 6.4596, | |
| "step": 2321 | |
| }, | |
| { | |
| "epoch": 0.2064, | |
| "grad_norm": 545.6451416015625, | |
| "learning_rate": 3e-06, | |
| "loss": -0.0272, | |
| "step": 2322 | |
| }, | |
| { | |
| "epoch": 0.2064888888888889, | |
| "grad_norm": 393.2840576171875, | |
| "learning_rate": 3e-06, | |
| "loss": 10.1428, | |
| "step": 2323 | |
| }, | |
| { | |
| "epoch": 0.20657777777777778, | |
| "grad_norm": 624.9196166992188, | |
| "learning_rate": 3e-06, | |
| "loss": 10.519, | |
| "step": 2324 | |
| }, | |
| { | |
| "epoch": 0.20666666666666667, | |
| "grad_norm": 423.4202880859375, | |
| "learning_rate": 3e-06, | |
| "loss": -6.3045, | |
| "step": 2325 | |
| }, | |
| { | |
| "epoch": 0.20675555555555555, | |
| "grad_norm": 396.0754699707031, | |
| "learning_rate": 3e-06, | |
| "loss": -11.096, | |
| "step": 2326 | |
| }, | |
| { | |
| "epoch": 0.20684444444444444, | |
| "grad_norm": 583.927734375, | |
| "learning_rate": 3e-06, | |
| "loss": 2.5989, | |
| "step": 2327 | |
| }, | |
| { | |
| "epoch": 0.20693333333333333, | |
| "grad_norm": 597.1937255859375, | |
| "learning_rate": 3e-06, | |
| "loss": -8.4196, | |
| "step": 2328 | |
| }, | |
| { | |
| "completion_length": 242.62500762939453, | |
| "epoch": 0.20702222222222222, | |
| "grad_norm": 800.9533081054688, | |
| "learning_rate": 3e-06, | |
| "loss": -33.827, | |
| "reward": 1.4375000596046448, | |
| "reward_std": 0.46232303977012634, | |
| "rewards/boxed_and_answer_tags_format_reward": 0.6875, | |
| "rewards/correctness_reward_func_math": 0.75, | |
| "step": 2329, | |
| "zero_std_ratio": 0.5 | |
| }, | |
| { | |
| "epoch": 0.2071111111111111, | |
| "grad_norm": 696.8334350585938, | |
| "learning_rate": 3e-06, | |
| "loss": -20.2565, | |
| "step": 2330 | |
| }, | |
| { | |
| "epoch": 0.2072, | |
| "grad_norm": 728.7371826171875, | |
| "learning_rate": 3e-06, | |
| "loss": -55.6491, | |
| "step": 2331 | |
| }, | |
| { | |
| "epoch": 0.20728888888888888, | |
| "grad_norm": 845.1964111328125, | |
| "learning_rate": 3e-06, | |
| "loss": -34.6344, | |
| "step": 2332 | |
| }, | |
| { | |
| "epoch": 0.20737777777777777, | |
| "grad_norm": 797.7058715820312, | |
| "learning_rate": 3e-06, | |
| "loss": -38.3381, | |
| "step": 2333 | |
| }, | |
| { | |
| "epoch": 0.20746666666666666, | |
| "grad_norm": 815.6392211914062, | |
| "learning_rate": 3e-06, | |
| "loss": -39.5545, | |
| "step": 2334 | |
| }, | |
| { | |
| "epoch": 0.20755555555555555, | |
| "grad_norm": 824.5341796875, | |
| "learning_rate": 3e-06, | |
| "loss": -42.9804, | |
| "step": 2335 | |
| }, | |
| { | |
| "epoch": 0.20764444444444444, | |
| "grad_norm": 839.4075927734375, | |
| "learning_rate": 3e-06, | |
| "loss": -31.0013, | |
| "step": 2336 | |
| }, | |
| { | |
| "epoch": 0.20773333333333333, | |
| "grad_norm": 808.272705078125, | |
| "learning_rate": 3e-06, | |
| "loss": -63.1393, | |
| "step": 2337 | |
| }, | |
| { | |
| "epoch": 0.2078222222222222, | |
| "grad_norm": 937.5029296875, | |
| "learning_rate": 3e-06, | |
| "loss": -49.1489, | |
| "step": 2338 | |
| }, | |
| { | |
| "epoch": 0.2079111111111111, | |
| "grad_norm": 852.005859375, | |
| "learning_rate": 3e-06, | |
| "loss": -50.4064, | |
| "step": 2339 | |
| }, | |
| { | |
| "epoch": 0.208, | |
| "grad_norm": 897.2970581054688, | |
| "learning_rate": 3e-06, | |
| "loss": -52.8892, | |
| "step": 2340 | |
| }, | |
| { | |
| "completion_length": 222.70834350585938, | |
| "epoch": 0.20808888888888888, | |
| "grad_norm": 757.0648803710938, | |
| "learning_rate": 3e-06, | |
| "loss": 35.128, | |
| "reward": 1.7083333730697632, | |
| "reward_std": 0.3332235887646675, | |
| "rewards/boxed_and_answer_tags_format_reward": 0.625, | |
| "rewards/correctness_reward_func_math": 1.0833333134651184, | |
| "step": 2341, | |
| "zero_std_ratio": 0.625 | |
| }, | |
| { | |
| "epoch": 0.20817777777777777, | |
| "grad_norm": 974.6918334960938, | |
| "learning_rate": 3e-06, | |
| "loss": 26.4346, | |
| "step": 2342 | |
| }, | |
| { | |
| "epoch": 0.20826666666666666, | |
| "grad_norm": 960.5851440429688, | |
| "learning_rate": 3e-06, | |
| "loss": 18.2117, | |
| "step": 2343 | |
| }, | |
| { | |
| "epoch": 0.20835555555555554, | |
| "grad_norm": 748.3045654296875, | |
| "learning_rate": 3e-06, | |
| "loss": 19.8586, | |
| "step": 2344 | |
| }, | |
| { | |
| "epoch": 0.20844444444444443, | |
| "grad_norm": 900.9320068359375, | |
| "learning_rate": 3e-06, | |
| "loss": 9.7996, | |
| "step": 2345 | |
| }, | |
| { | |
| "epoch": 0.20853333333333332, | |
| "grad_norm": 769.8694458007812, | |
| "learning_rate": 3e-06, | |
| "loss": 25.5797, | |
| "step": 2346 | |
| }, | |
| { | |
| "epoch": 0.2086222222222222, | |
| "grad_norm": 734.0509033203125, | |
| "learning_rate": 3e-06, | |
| "loss": 27.9682, | |
| "step": 2347 | |
| }, | |
| { | |
| "epoch": 0.2087111111111111, | |
| "grad_norm": 869.8523559570312, | |
| "learning_rate": 3e-06, | |
| "loss": 21.5981, | |
| "step": 2348 | |
| }, | |
| { | |
| "epoch": 0.2088, | |
| "grad_norm": 1000.9803466796875, | |
| "learning_rate": 3e-06, | |
| "loss": 4.3671, | |
| "step": 2349 | |
| }, | |
| { | |
| "epoch": 0.2088888888888889, | |
| "grad_norm": 766.8132934570312, | |
| "learning_rate": 3e-06, | |
| "loss": 10.1062, | |
| "step": 2350 | |
| }, | |
| { | |
| "epoch": 0.2089777777777778, | |
| "grad_norm": 1140.35986328125, | |
| "learning_rate": 3e-06, | |
| "loss": -0.0209, | |
| "step": 2351 | |
| }, | |
| { | |
| "epoch": 0.20906666666666668, | |
| "grad_norm": 769.6608276367188, | |
| "learning_rate": 3e-06, | |
| "loss": 17.1774, | |
| "step": 2352 | |
| }, | |
| { | |
| "completion_length": 235.12500762939453, | |
| "epoch": 0.20915555555555557, | |
| "grad_norm": 1358.110595703125, | |
| "learning_rate": 3e-06, | |
| "loss": -29.3384, | |
| "reward": 1.5625000596046448, | |
| "reward_std": 0.599253699183464, | |
| "rewards/boxed_and_answer_tags_format_reward": 0.6875, | |
| "rewards/correctness_reward_func_math": 0.8750000298023224, | |
| "step": 2353, | |
| "zero_std_ratio": 0.375 | |
| }, | |
| { | |
| "epoch": 0.20924444444444446, | |
| "grad_norm": 1078.0203857421875, | |
| "learning_rate": 3e-06, | |
| "loss": -4.6919, | |
| "step": 2354 | |
| }, | |
| { | |
| "epoch": 0.20933333333333334, | |
| "grad_norm": 1239.9210205078125, | |
| "learning_rate": 3e-06, | |
| "loss": -58.9311, | |
| "step": 2355 | |
| }, | |
| { | |
| "epoch": 0.20942222222222223, | |
| "grad_norm": 1190.312255859375, | |
| "learning_rate": 3e-06, | |
| "loss": -41.3338, | |
| "step": 2356 | |
| }, | |
| { | |
| "epoch": 0.20951111111111112, | |
| "grad_norm": 1177.6978759765625, | |
| "learning_rate": 3e-06, | |
| "loss": -77.211, | |
| "step": 2357 | |
| }, | |
| { | |
| "epoch": 0.2096, | |
| "grad_norm": 1262.98876953125, | |
| "learning_rate": 3e-06, | |
| "loss": -30.6502, | |
| "step": 2358 | |
| }, | |
| { | |
| "epoch": 0.2096888888888889, | |
| "grad_norm": 1281.507568359375, | |
| "learning_rate": 3e-06, | |
| "loss": -46.3909, | |
| "step": 2359 | |
| }, | |
| { | |
| "epoch": 0.20977777777777779, | |
| "grad_norm": 1242.6148681640625, | |
| "learning_rate": 3e-06, | |
| "loss": -20.0434, | |
| "step": 2360 | |
| }, | |
| { | |
| "epoch": 0.20986666666666667, | |
| "grad_norm": 1216.9324951171875, | |
| "learning_rate": 3e-06, | |
| "loss": -75.5012, | |
| "step": 2361 | |
| }, | |
| { | |
| "epoch": 0.20995555555555556, | |
| "grad_norm": 1148.396240234375, | |
| "learning_rate": 3e-06, | |
| "loss": -60.3074, | |
| "step": 2362 | |
| }, | |
| { | |
| "epoch": 0.21004444444444445, | |
| "grad_norm": 1038.05224609375, | |
| "learning_rate": 3e-06, | |
| "loss": -90.1416, | |
| "step": 2363 | |
| }, | |
| { | |
| "epoch": 0.21013333333333334, | |
| "grad_norm": 1481.82470703125, | |
| "learning_rate": 3e-06, | |
| "loss": -41.4112, | |
| "step": 2364 | |
| }, | |
| { | |
| "completion_length": 236.4375, | |
| "epoch": 0.21022222222222223, | |
| "grad_norm": 1174.8533935546875, | |
| "learning_rate": 3e-06, | |
| "loss": -15.5789, | |
| "reward": 1.2500000298023224, | |
| "reward_std": 0.4431168735027313, | |
| "rewards/boxed_and_answer_tags_format_reward": 0.75, | |
| "rewards/correctness_reward_func_math": 0.4999999925494194, | |
| "step": 2365, | |
| "zero_std_ratio": 0.5 | |
| }, | |
| { | |
| "epoch": 0.21031111111111112, | |
| "grad_norm": 3015.200927734375, | |
| "learning_rate": 3e-06, | |
| "loss": -27.8838, | |
| "step": 2366 | |
| }, | |
| { | |
| "epoch": 0.2104, | |
| "grad_norm": 1439.3358154296875, | |
| "learning_rate": 3e-06, | |
| "loss": -38.5885, | |
| "step": 2367 | |
| }, | |
| { | |
| "epoch": 0.2104888888888889, | |
| "grad_norm": 869.1331787109375, | |
| "learning_rate": 3e-06, | |
| "loss": -19.1413, | |
| "step": 2368 | |
| }, | |
| { | |
| "epoch": 0.21057777777777778, | |
| "grad_norm": 954.2206420898438, | |
| "learning_rate": 3e-06, | |
| "loss": -22.8123, | |
| "step": 2369 | |
| }, | |
| { | |
| "epoch": 0.21066666666666667, | |
| "grad_norm": 923.0101928710938, | |
| "learning_rate": 3e-06, | |
| "loss": -34.3305, | |
| "step": 2370 | |
| }, | |
| { | |
| "epoch": 0.21075555555555556, | |
| "grad_norm": 1316.6534423828125, | |
| "learning_rate": 3e-06, | |
| "loss": -30.9986, | |
| "step": 2371 | |
| }, | |
| { | |
| "epoch": 0.21084444444444445, | |
| "grad_norm": 1236.0667724609375, | |
| "learning_rate": 3e-06, | |
| "loss": -48.6076, | |
| "step": 2372 | |
| }, | |
| { | |
| "epoch": 0.21093333333333333, | |
| "grad_norm": 1252.537109375, | |
| "learning_rate": 3e-06, | |
| "loss": -61.8291, | |
| "step": 2373 | |
| }, | |
| { | |
| "epoch": 0.21102222222222222, | |
| "grad_norm": 1343.861328125, | |
| "learning_rate": 3e-06, | |
| "loss": -29.769, | |
| "step": 2374 | |
| }, | |
| { | |
| "epoch": 0.2111111111111111, | |
| "grad_norm": 1069.728515625, | |
| "learning_rate": 3e-06, | |
| "loss": -31.3108, | |
| "step": 2375 | |
| }, | |
| { | |
| "epoch": 0.2112, | |
| "grad_norm": 898.3480834960938, | |
| "learning_rate": 3e-06, | |
| "loss": -53.5648, | |
| "step": 2376 | |
| }, | |
| { | |
| "completion_length": 250.2916717529297, | |
| "epoch": 0.2112888888888889, | |
| "grad_norm": 1555.4439697265625, | |
| "learning_rate": 3e-06, | |
| "loss": 23.924, | |
| "reward": 0.9583333432674408, | |
| "reward_std": 0.43528565764427185, | |
| "rewards/boxed_and_answer_tags_format_reward": 0.75, | |
| "rewards/correctness_reward_func_math": 0.2083333358168602, | |
| "step": 2377, | |
| "zero_std_ratio": 0.5 | |
| }, | |
| { | |
| "epoch": 0.21137777777777778, | |
| "grad_norm": 1271.503662109375, | |
| "learning_rate": 3e-06, | |
| "loss": -28.6245, | |
| "step": 2378 | |
| }, | |
| { | |
| "epoch": 0.21146666666666666, | |
| "grad_norm": 1083.2822265625, | |
| "learning_rate": 3e-06, | |
| "loss": -61.1598, | |
| "step": 2379 | |
| }, | |
| { | |
| "epoch": 0.21155555555555555, | |
| "grad_norm": 950.6062622070312, | |
| "learning_rate": 3e-06, | |
| "loss": -19.0397, | |
| "step": 2380 | |
| }, | |
| { | |
| "epoch": 0.21164444444444444, | |
| "grad_norm": 1111.03857421875, | |
| "learning_rate": 3e-06, | |
| "loss": 11.29, | |
| "step": 2381 | |
| }, | |
| { | |
| "epoch": 0.21173333333333333, | |
| "grad_norm": 1028.947509765625, | |
| "learning_rate": 3e-06, | |
| "loss": -16.2803, | |
| "step": 2382 | |
| }, | |
| { | |
| "epoch": 0.21182222222222222, | |
| "grad_norm": 1200.6107177734375, | |
| "learning_rate": 3e-06, | |
| "loss": 14.5257, | |
| "step": 2383 | |
| }, | |
| { | |
| "epoch": 0.2119111111111111, | |
| "grad_norm": 1430.796630859375, | |
| "learning_rate": 3e-06, | |
| "loss": -34.2578, | |
| "step": 2384 | |
| }, | |
| { | |
| "epoch": 0.212, | |
| "grad_norm": 1028.548583984375, | |
| "learning_rate": 3e-06, | |
| "loss": -68.5076, | |
| "step": 2385 | |
| }, | |
| { | |
| "epoch": 0.21208888888888888, | |
| "grad_norm": 1001.2411499023438, | |
| "learning_rate": 3e-06, | |
| "loss": -25.6736, | |
| "step": 2386 | |
| }, | |
| { | |
| "epoch": 0.21217777777777777, | |
| "grad_norm": 984.0308227539062, | |
| "learning_rate": 3e-06, | |
| "loss": -2.7987, | |
| "step": 2387 | |
| }, | |
| { | |
| "epoch": 0.21226666666666666, | |
| "grad_norm": 965.3121337890625, | |
| "learning_rate": 3e-06, | |
| "loss": -28.9898, | |
| "step": 2388 | |
| }, | |
| { | |
| "completion_length": 229.2916717529297, | |
| "epoch": 0.21235555555555555, | |
| "grad_norm": 557.446533203125, | |
| "learning_rate": 3e-06, | |
| "loss": 21.0555, | |
| "reward": 1.8437500596046448, | |
| "reward_std": 0.1546149756759405, | |
| "rewards/boxed_and_answer_tags_format_reward": 0.6770833432674408, | |
| "rewards/correctness_reward_func_math": 1.1666666716337204, | |
| "step": 2389, | |
| "zero_std_ratio": 0.75 | |
| }, | |
| { | |
| "epoch": 0.21244444444444444, | |
| "grad_norm": 575.1875610351562, | |
| "learning_rate": 3e-06, | |
| "loss": 23.3964, | |
| "step": 2390 | |
| }, | |
| { | |
| "epoch": 0.21253333333333332, | |
| "grad_norm": 360.0423278808594, | |
| "learning_rate": 3e-06, | |
| "loss": 33.3316, | |
| "step": 2391 | |
| }, | |
| { | |
| "epoch": 0.2126222222222222, | |
| "grad_norm": 255.54953002929688, | |
| "learning_rate": 3e-06, | |
| "loss": 31.5129, | |
| "step": 2392 | |
| }, | |
| { | |
| "epoch": 0.2127111111111111, | |
| "grad_norm": 265.9867248535156, | |
| "learning_rate": 3e-06, | |
| "loss": 16.7137, | |
| "step": 2393 | |
| }, | |
| { | |
| "epoch": 0.2128, | |
| "grad_norm": 356.7539978027344, | |
| "learning_rate": 3e-06, | |
| "loss": 29.8486, | |
| "step": 2394 | |
| }, | |
| { | |
| "epoch": 0.21288888888888888, | |
| "grad_norm": 380.1522521972656, | |
| "learning_rate": 3e-06, | |
| "loss": 18.9171, | |
| "step": 2395 | |
| }, | |
| { | |
| "epoch": 0.21297777777777777, | |
| "grad_norm": 627.226806640625, | |
| "learning_rate": 3e-06, | |
| "loss": 14.277, | |
| "step": 2396 | |
| }, | |
| { | |
| "epoch": 0.21306666666666665, | |
| "grad_norm": 473.0029296875, | |
| "learning_rate": 3e-06, | |
| "loss": 26.1599, | |
| "step": 2397 | |
| }, | |
| { | |
| "epoch": 0.21315555555555554, | |
| "grad_norm": 256.2850646972656, | |
| "learning_rate": 3e-06, | |
| "loss": 27.8036, | |
| "step": 2398 | |
| }, | |
| { | |
| "epoch": 0.21324444444444443, | |
| "grad_norm": 294.08056640625, | |
| "learning_rate": 3e-06, | |
| "loss": 11.133, | |
| "step": 2399 | |
| }, | |
| { | |
| "epoch": 0.21333333333333335, | |
| "grad_norm": 292.5602111816406, | |
| "learning_rate": 3e-06, | |
| "loss": 21.8162, | |
| "step": 2400 | |
| }, | |
| { | |
| "completion_length": 248.7291717529297, | |
| "epoch": 0.21342222222222224, | |
| "grad_norm": 991.9778442382812, | |
| "learning_rate": 3e-06, | |
| "loss": 12.964, | |
| "reward": 1.5000000596046448, | |
| "reward_std": 0.20412413775920868, | |
| "rewards/boxed_and_answer_tags_format_reward": 0.75, | |
| "rewards/correctness_reward_func_math": 0.75, | |
| "step": 2401, | |
| "zero_std_ratio": 0.75 | |
| }, | |
| { | |
| "epoch": 0.21351111111111112, | |
| "grad_norm": 644.521484375, | |
| "learning_rate": 3e-06, | |
| "loss": -4.3122, | |
| "step": 2402 | |
| }, | |
| { | |
| "epoch": 0.2136, | |
| "grad_norm": 680.9287719726562, | |
| "learning_rate": 3e-06, | |
| "loss": 3.5692, | |
| "step": 2403 | |
| }, | |
| { | |
| "epoch": 0.2136888888888889, | |
| "grad_norm": 824.704345703125, | |
| "learning_rate": 3e-06, | |
| "loss": 20.7909, | |
| "step": 2404 | |
| }, | |
| { | |
| "epoch": 0.2137777777777778, | |
| "grad_norm": 956.5078125, | |
| "learning_rate": 3e-06, | |
| "loss": 10.9202, | |
| "step": 2405 | |
| }, | |
| { | |
| "epoch": 0.21386666666666668, | |
| "grad_norm": 1738.125244140625, | |
| "learning_rate": 3e-06, | |
| "loss": 5.5927, | |
| "step": 2406 | |
| }, | |
| { | |
| "epoch": 0.21395555555555557, | |
| "grad_norm": 920.435546875, | |
| "learning_rate": 3e-06, | |
| "loss": 2.9809, | |
| "step": 2407 | |
| }, | |
| { | |
| "epoch": 0.21404444444444445, | |
| "grad_norm": 677.7364501953125, | |
| "learning_rate": 3e-06, | |
| "loss": -7.6728, | |
| "step": 2408 | |
| }, | |
| { | |
| "epoch": 0.21413333333333334, | |
| "grad_norm": 724.0176391601562, | |
| "learning_rate": 3e-06, | |
| "loss": -5.1334, | |
| "step": 2409 | |
| }, | |
| { | |
| "epoch": 0.21422222222222223, | |
| "grad_norm": 885.7861938476562, | |
| "learning_rate": 3e-06, | |
| "loss": 17.665, | |
| "step": 2410 | |
| }, | |
| { | |
| "epoch": 0.21431111111111112, | |
| "grad_norm": 884.716552734375, | |
| "learning_rate": 3e-06, | |
| "loss": 3.5877, | |
| "step": 2411 | |
| }, | |
| { | |
| "epoch": 0.2144, | |
| "grad_norm": 1398.6461181640625, | |
| "learning_rate": 3e-06, | |
| "loss": 3.2704, | |
| "step": 2412 | |
| }, | |
| { | |
| "completion_length": 234.20834350585938, | |
| "epoch": 0.2144888888888889, | |
| "grad_norm": 1070.3880615234375, | |
| "learning_rate": 3e-06, | |
| "loss": -4.4756, | |
| "reward": 1.3750000596046448, | |
| "reward_std": 0.23116153478622437, | |
| "rewards/boxed_and_answer_tags_format_reward": 0.75, | |
| "rewards/correctness_reward_func_math": 0.6249999813735485, | |
| "step": 2413, | |
| "zero_std_ratio": 0.75 | |
| }, | |
| { | |
| "epoch": 0.21457777777777778, | |
| "grad_norm": 1026.6502685546875, | |
| "learning_rate": 3e-06, | |
| "loss": 44.4837, | |
| "step": 2414 | |
| }, | |
| { | |
| "epoch": 0.21466666666666667, | |
| "grad_norm": 772.33837890625, | |
| "learning_rate": 3e-06, | |
| "loss": 37.0703, | |
| "step": 2415 | |
| }, | |
| { | |
| "epoch": 0.21475555555555556, | |
| "grad_norm": 1085.96875, | |
| "learning_rate": 3e-06, | |
| "loss": 30.7537, | |
| "step": 2416 | |
| }, | |
| { | |
| "epoch": 0.21484444444444445, | |
| "grad_norm": 827.8580932617188, | |
| "learning_rate": 3e-06, | |
| "loss": 48.8112, | |
| "step": 2417 | |
| }, | |
| { | |
| "epoch": 0.21493333333333334, | |
| "grad_norm": 781.8536376953125, | |
| "learning_rate": 3e-06, | |
| "loss": 35.331, | |
| "step": 2418 | |
| }, | |
| { | |
| "epoch": 0.21502222222222223, | |
| "grad_norm": 830.3689575195312, | |
| "learning_rate": 3e-06, | |
| "loss": -6.2381, | |
| "step": 2419 | |
| }, | |
| { | |
| "epoch": 0.21511111111111111, | |
| "grad_norm": 1895.36572265625, | |
| "learning_rate": 3e-06, | |
| "loss": 38.7443, | |
| "step": 2420 | |
| }, | |
| { | |
| "epoch": 0.2152, | |
| "grad_norm": 1093.376220703125, | |
| "learning_rate": 3e-06, | |
| "loss": 32.4039, | |
| "step": 2421 | |
| }, | |
| { | |
| "epoch": 0.2152888888888889, | |
| "grad_norm": 1121.695556640625, | |
| "learning_rate": 3e-06, | |
| "loss": 21.949, | |
| "step": 2422 | |
| }, | |
| { | |
| "epoch": 0.21537777777777778, | |
| "grad_norm": 908.1818237304688, | |
| "learning_rate": 3e-06, | |
| "loss": 39.3356, | |
| "step": 2423 | |
| }, | |
| { | |
| "epoch": 0.21546666666666667, | |
| "grad_norm": 780.5909423828125, | |
| "learning_rate": 3e-06, | |
| "loss": 28.4789, | |
| "step": 2424 | |
| }, | |
| { | |
| "completion_length": 240.37500762939453, | |
| "epoch": 0.21555555555555556, | |
| "grad_norm": 1291.5950927734375, | |
| "learning_rate": 3e-06, | |
| "loss": -48.4281, | |
| "reward": 1.625, | |
| "reward_std": 0.564385175704956, | |
| "rewards/boxed_and_answer_tags_format_reward": 0.75, | |
| "rewards/correctness_reward_func_math": 0.875, | |
| "step": 2425, | |
| "zero_std_ratio": 0.375 | |
| }, | |
| { | |
| "epoch": 0.21564444444444444, | |
| "grad_norm": 1240.4404296875, | |
| "learning_rate": 3e-06, | |
| "loss": -54.5814, | |
| "step": 2426 | |
| }, | |
| { | |
| "epoch": 0.21573333333333333, | |
| "grad_norm": 1328.792724609375, | |
| "learning_rate": 3e-06, | |
| "loss": -77.3574, | |
| "step": 2427 | |
| }, | |
| { | |
| "epoch": 0.21582222222222222, | |
| "grad_norm": 1365.1212158203125, | |
| "learning_rate": 3e-06, | |
| "loss": -56.5684, | |
| "step": 2428 | |
| }, | |
| { | |
| "epoch": 0.2159111111111111, | |
| "grad_norm": 1141.714111328125, | |
| "learning_rate": 3e-06, | |
| "loss": -52.8116, | |
| "step": 2429 | |
| }, | |
| { | |
| "epoch": 0.216, | |
| "grad_norm": 1254.617919921875, | |
| "learning_rate": 3e-06, | |
| "loss": -45.3142, | |
| "step": 2430 | |
| }, | |
| { | |
| "epoch": 0.21608888888888889, | |
| "grad_norm": 1215.14404296875, | |
| "learning_rate": 3e-06, | |
| "loss": -61.5184, | |
| "step": 2431 | |
| }, | |
| { | |
| "epoch": 0.21617777777777777, | |
| "grad_norm": 2824.7529296875, | |
| "learning_rate": 3e-06, | |
| "loss": -67.1598, | |
| "step": 2432 | |
| }, | |
| { | |
| "epoch": 0.21626666666666666, | |
| "grad_norm": 1158.3133544921875, | |
| "learning_rate": 3e-06, | |
| "loss": -84.968, | |
| "step": 2433 | |
| }, | |
| { | |
| "epoch": 0.21635555555555555, | |
| "grad_norm": 948.725341796875, | |
| "learning_rate": 3e-06, | |
| "loss": -67.6487, | |
| "step": 2434 | |
| }, | |
| { | |
| "epoch": 0.21644444444444444, | |
| "grad_norm": 1137.3834228515625, | |
| "learning_rate": 3e-06, | |
| "loss": -60.4848, | |
| "step": 2435 | |
| }, | |
| { | |
| "epoch": 0.21653333333333333, | |
| "grad_norm": 1104.3291015625, | |
| "learning_rate": 3e-06, | |
| "loss": -61.2857, | |
| "step": 2436 | |
| }, | |
| { | |
| "completion_length": 240.14583587646484, | |
| "epoch": 0.21662222222222222, | |
| "grad_norm": 668.9032592773438, | |
| "learning_rate": 3e-06, | |
| "loss": 11.3774, | |
| "reward": 1.2291667461395264, | |
| "reward_std": 0.23116152733564377, | |
| "rewards/boxed_and_answer_tags_format_reward": 0.6875, | |
| "rewards/correctness_reward_func_math": 0.5416666716337204, | |
| "step": 2437, | |
| "zero_std_ratio": 0.75 | |
| }, | |
| { | |
| "epoch": 0.2167111111111111, | |
| "grad_norm": 636.1585693359375, | |
| "learning_rate": 3e-06, | |
| "loss": -7.5818, | |
| "step": 2438 | |
| }, | |
| { | |
| "epoch": 0.2168, | |
| "grad_norm": 497.7554626464844, | |
| "learning_rate": 3e-06, | |
| "loss": 6.8708, | |
| "step": 2439 | |
| }, | |
| { | |
| "epoch": 0.21688888888888888, | |
| "grad_norm": 553.154052734375, | |
| "learning_rate": 3e-06, | |
| "loss": 24.3757, | |
| "step": 2440 | |
| }, | |
| { | |
| "epoch": 0.21697777777777777, | |
| "grad_norm": 1472.5419921875, | |
| "learning_rate": 3e-06, | |
| "loss": -3.4629, | |
| "step": 2441 | |
| }, | |
| { | |
| "epoch": 0.21706666666666666, | |
| "grad_norm": 652.6502685546875, | |
| "learning_rate": 3e-06, | |
| "loss": 18.709, | |
| "step": 2442 | |
| }, | |
| { | |
| "epoch": 0.21715555555555555, | |
| "grad_norm": 599.662353515625, | |
| "learning_rate": 3e-06, | |
| "loss": 6.6085, | |
| "step": 2443 | |
| }, | |
| { | |
| "epoch": 0.21724444444444443, | |
| "grad_norm": 596.9681396484375, | |
| "learning_rate": 3e-06, | |
| "loss": -10.8602, | |
| "step": 2444 | |
| }, | |
| { | |
| "epoch": 0.21733333333333332, | |
| "grad_norm": 487.63580322265625, | |
| "learning_rate": 3e-06, | |
| "loss": -1.5804, | |
| "step": 2445 | |
| }, | |
| { | |
| "epoch": 0.2174222222222222, | |
| "grad_norm": 1273.145751953125, | |
| "learning_rate": 3e-06, | |
| "loss": 16.3577, | |
| "step": 2446 | |
| }, | |
| { | |
| "epoch": 0.2175111111111111, | |
| "grad_norm": 771.951904296875, | |
| "learning_rate": 3e-06, | |
| "loss": -6.3023, | |
| "step": 2447 | |
| }, | |
| { | |
| "epoch": 0.2176, | |
| "grad_norm": 712.5418701171875, | |
| "learning_rate": 3e-06, | |
| "loss": 16.2355, | |
| "step": 2448 | |
| }, | |
| { | |
| "completion_length": 248.37500762939453, | |
| "epoch": 0.21768888888888888, | |
| "grad_norm": 790.022216796875, | |
| "learning_rate": 3e-06, | |
| "loss": 7.116, | |
| "reward": 2.2291667461395264, | |
| "reward_std": 0.26603007316589355, | |
| "rewards/boxed_and_answer_tags_format_reward": 0.6875, | |
| "rewards/correctness_reward_func_math": 1.5416666865348816, | |
| "step": 2449, | |
| "zero_std_ratio": 0.75 | |
| }, | |
| { | |
| "epoch": 0.21777777777777776, | |
| "grad_norm": 546.7139892578125, | |
| "learning_rate": 3e-06, | |
| "loss": -10.9262, | |
| "step": 2450 | |
| }, | |
| { | |
| "epoch": 0.21786666666666665, | |
| "grad_norm": 517.8162231445312, | |
| "learning_rate": 3e-06, | |
| "loss": 0.3609, | |
| "step": 2451 | |
| }, | |
| { | |
| "epoch": 0.21795555555555557, | |
| "grad_norm": 682.6878051757812, | |
| "learning_rate": 3e-06, | |
| "loss": 0.4365, | |
| "step": 2452 | |
| }, | |
| { | |
| "epoch": 0.21804444444444446, | |
| "grad_norm": 607.7827758789062, | |
| "learning_rate": 3e-06, | |
| "loss": 10.3393, | |
| "step": 2453 | |
| }, | |
| { | |
| "epoch": 0.21813333333333335, | |
| "grad_norm": 518.8916625976562, | |
| "learning_rate": 3e-06, | |
| "loss": 0.2425, | |
| "step": 2454 | |
| }, | |
| { | |
| "epoch": 0.21822222222222223, | |
| "grad_norm": 629.7891235351562, | |
| "learning_rate": 3e-06, | |
| "loss": -3.1223, | |
| "step": 2455 | |
| }, | |
| { | |
| "epoch": 0.21831111111111112, | |
| "grad_norm": 510.4330139160156, | |
| "learning_rate": 3e-06, | |
| "loss": -15.683, | |
| "step": 2456 | |
| }, | |
| { | |
| "epoch": 0.2184, | |
| "grad_norm": 553.461669921875, | |
| "learning_rate": 3e-06, | |
| "loss": -4.2699, | |
| "step": 2457 | |
| }, | |
| { | |
| "epoch": 0.2184888888888889, | |
| "grad_norm": 526.62109375, | |
| "learning_rate": 3e-06, | |
| "loss": -6.1042, | |
| "step": 2458 | |
| }, | |
| { | |
| "epoch": 0.2185777777777778, | |
| "grad_norm": 562.5404052734375, | |
| "learning_rate": 3e-06, | |
| "loss": -0.4739, | |
| "step": 2459 | |
| }, | |
| { | |
| "epoch": 0.21866666666666668, | |
| "grad_norm": 544.2666625976562, | |
| "learning_rate": 3e-06, | |
| "loss": -6.7032, | |
| "step": 2460 | |
| }, | |
| { | |
| "completion_length": 246.02083587646484, | |
| "epoch": 0.21875555555555556, | |
| "grad_norm": 1100.9122314453125, | |
| "learning_rate": 3e-06, | |
| "loss": 12.7639, | |
| "reward": 1.8750001192092896, | |
| "reward_std": 0.3061862289905548, | |
| "rewards/boxed_and_answer_tags_format_reward": 0.75, | |
| "rewards/correctness_reward_func_math": 1.1250000298023224, | |
| "step": 2461, | |
| "zero_std_ratio": 0.625 | |
| }, | |
| { | |
| "epoch": 0.21884444444444445, | |
| "grad_norm": 897.7620239257812, | |
| "learning_rate": 3e-06, | |
| "loss": 20.9614, | |
| "step": 2462 | |
| }, | |
| { | |
| "epoch": 0.21893333333333334, | |
| "grad_norm": 986.5072021484375, | |
| "learning_rate": 3e-06, | |
| "loss": 42.7016, | |
| "step": 2463 | |
| }, | |
| { | |
| "epoch": 0.21902222222222223, | |
| "grad_norm": 1005.2491455078125, | |
| "learning_rate": 3e-06, | |
| "loss": 59.914, | |
| "step": 2464 | |
| }, | |
| { | |
| "epoch": 0.21911111111111112, | |
| "grad_norm": 723.766357421875, | |
| "learning_rate": 3e-06, | |
| "loss": -18.4072, | |
| "step": 2465 | |
| }, | |
| { | |
| "epoch": 0.2192, | |
| "grad_norm": 814.1770629882812, | |
| "learning_rate": 3e-06, | |
| "loss": 56.8266, | |
| "step": 2466 | |
| }, | |
| { | |
| "epoch": 0.2192888888888889, | |
| "grad_norm": 880.131591796875, | |
| "learning_rate": 3e-06, | |
| "loss": 0.9147, | |
| "step": 2467 | |
| }, | |
| { | |
| "epoch": 0.21937777777777778, | |
| "grad_norm": 1002.6812744140625, | |
| "learning_rate": 3e-06, | |
| "loss": 10.1945, | |
| "step": 2468 | |
| }, | |
| { | |
| "epoch": 0.21946666666666667, | |
| "grad_norm": 928.541748046875, | |
| "learning_rate": 3e-06, | |
| "loss": 28.8268, | |
| "step": 2469 | |
| }, | |
| { | |
| "epoch": 0.21955555555555556, | |
| "grad_norm": 1081.3568115234375, | |
| "learning_rate": 3e-06, | |
| "loss": 42.3389, | |
| "step": 2470 | |
| }, | |
| { | |
| "epoch": 0.21964444444444445, | |
| "grad_norm": 849.524658203125, | |
| "learning_rate": 3e-06, | |
| "loss": -28.4933, | |
| "step": 2471 | |
| }, | |
| { | |
| "epoch": 0.21973333333333334, | |
| "grad_norm": 800.930419921875, | |
| "learning_rate": 3e-06, | |
| "loss": 48.021, | |
| "step": 2472 | |
| }, | |
| { | |
| "completion_length": 243.7291717529297, | |
| "epoch": 0.21982222222222222, | |
| "grad_norm": 840.5211181640625, | |
| "learning_rate": 3e-06, | |
| "loss": -29.1071, | |
| "reward": 1.4062500596046448, | |
| "reward_std": 0.47030356526374817, | |
| "rewards/boxed_and_answer_tags_format_reward": 0.7395833432674408, | |
| "rewards/correctness_reward_func_math": 0.6666666567325592, | |
| "step": 2473, | |
| "zero_std_ratio": 0.5 | |
| }, | |
| { | |
| "epoch": 0.2199111111111111, | |
| "grad_norm": 766.2994384765625, | |
| "learning_rate": 3e-06, | |
| "loss": -22.7254, | |
| "step": 2474 | |
| }, | |
| { | |
| "epoch": 0.22, | |
| "grad_norm": 1315.990966796875, | |
| "learning_rate": 3e-06, | |
| "loss": -2.9843, | |
| "step": 2475 | |
| }, | |
| { | |
| "epoch": 0.2200888888888889, | |
| "grad_norm": 823.3301391601562, | |
| "learning_rate": 3e-06, | |
| "loss": -45.7856, | |
| "step": 2476 | |
| }, | |
| { | |
| "epoch": 0.22017777777777778, | |
| "grad_norm": 969.9705810546875, | |
| "learning_rate": 3e-06, | |
| "loss": -26.3312, | |
| "step": 2477 | |
| }, | |
| { | |
| "epoch": 0.22026666666666667, | |
| "grad_norm": 1052.6732177734375, | |
| "learning_rate": 3e-06, | |
| "loss": -6.0265, | |
| "step": 2478 | |
| }, | |
| { | |
| "epoch": 0.22035555555555555, | |
| "grad_norm": 810.92041015625, | |
| "learning_rate": 3e-06, | |
| "loss": -35.8816, | |
| "step": 2479 | |
| }, | |
| { | |
| "epoch": 0.22044444444444444, | |
| "grad_norm": 716.6881713867188, | |
| "learning_rate": 3e-06, | |
| "loss": -34.8377, | |
| "step": 2480 | |
| }, | |
| { | |
| "epoch": 0.22053333333333333, | |
| "grad_norm": 1079.2554931640625, | |
| "learning_rate": 3e-06, | |
| "loss": -13.4226, | |
| "step": 2481 | |
| }, | |
| { | |
| "epoch": 0.22062222222222222, | |
| "grad_norm": 770.211669921875, | |
| "learning_rate": 3e-06, | |
| "loss": -50.3465, | |
| "step": 2482 | |
| }, | |
| { | |
| "epoch": 0.2207111111111111, | |
| "grad_norm": 886.2747802734375, | |
| "learning_rate": 3e-06, | |
| "loss": -33.0956, | |
| "step": 2483 | |
| }, | |
| { | |
| "epoch": 0.2208, | |
| "grad_norm": 890.3179321289062, | |
| "learning_rate": 3e-06, | |
| "loss": -15.2544, | |
| "step": 2484 | |
| }, | |
| { | |
| "completion_length": 253.58333587646484, | |
| "epoch": 0.22088888888888888, | |
| "grad_norm": 303.0287780761719, | |
| "learning_rate": 3e-06, | |
| "loss": -15.111, | |
| "reward": 1.2916666865348816, | |
| "reward_std": 0.10206207633018494, | |
| "rewards/boxed_and_answer_tags_format_reward": 0.75, | |
| "rewards/correctness_reward_func_math": 0.5416666567325592, | |
| "step": 2485, | |
| "zero_std_ratio": 0.875 | |
| }, | |
| { | |
| "epoch": 0.22097777777777777, | |
| "grad_norm": 377.69757080078125, | |
| "learning_rate": 3e-06, | |
| "loss": -16.8297, | |
| "step": 2486 | |
| }, | |
| { | |
| "epoch": 0.22106666666666666, | |
| "grad_norm": 388.11773681640625, | |
| "learning_rate": 3e-06, | |
| "loss": -14.6188, | |
| "step": 2487 | |
| }, | |
| { | |
| "epoch": 0.22115555555555555, | |
| "grad_norm": 291.3501892089844, | |
| "learning_rate": 3e-06, | |
| "loss": -13.4672, | |
| "step": 2488 | |
| }, | |
| { | |
| "epoch": 0.22124444444444444, | |
| "grad_norm": 439.7605285644531, | |
| "learning_rate": 3e-06, | |
| "loss": -23.8357, | |
| "step": 2489 | |
| }, | |
| { | |
| "epoch": 0.22133333333333333, | |
| "grad_norm": 379.401611328125, | |
| "learning_rate": 3e-06, | |
| "loss": -9.9997, | |
| "step": 2490 | |
| }, | |
| { | |
| "epoch": 0.22142222222222221, | |
| "grad_norm": 297.3055725097656, | |
| "learning_rate": 3e-06, | |
| "loss": -18.6912, | |
| "step": 2491 | |
| }, | |
| { | |
| "epoch": 0.2215111111111111, | |
| "grad_norm": 412.62890625, | |
| "learning_rate": 3e-06, | |
| "loss": -20.8417, | |
| "step": 2492 | |
| }, | |
| { | |
| "epoch": 0.2216, | |
| "grad_norm": 300.6817321777344, | |
| "learning_rate": 3e-06, | |
| "loss": -19.9718, | |
| "step": 2493 | |
| }, | |
| { | |
| "epoch": 0.22168888888888888, | |
| "grad_norm": 268.96551513671875, | |
| "learning_rate": 3e-06, | |
| "loss": -18.9416, | |
| "step": 2494 | |
| }, | |
| { | |
| "epoch": 0.22177777777777777, | |
| "grad_norm": 426.9893798828125, | |
| "learning_rate": 3e-06, | |
| "loss": -27.8923, | |
| "step": 2495 | |
| }, | |
| { | |
| "epoch": 0.22186666666666666, | |
| "grad_norm": 381.15704345703125, | |
| "learning_rate": 3e-06, | |
| "loss": -19.2658, | |
| "step": 2496 | |
| }, | |
| { | |
| "completion_length": 249.95833587646484, | |
| "epoch": 0.22195555555555554, | |
| "grad_norm": 1263.931640625, | |
| "learning_rate": 3e-06, | |
| "loss": -12.4574, | |
| "reward": 1.1875, | |
| "reward_std": 0.46232303977012634, | |
| "rewards/boxed_and_answer_tags_format_reward": 0.6875, | |
| "rewards/correctness_reward_func_math": 0.5, | |
| "step": 2497, | |
| "zero_std_ratio": 0.5 | |
| }, | |
| { | |
| "epoch": 0.22204444444444443, | |
| "grad_norm": 963.8367919921875, | |
| "learning_rate": 3e-06, | |
| "loss": 15.1327, | |
| "step": 2498 | |
| }, | |
| { | |
| "epoch": 0.22213333333333332, | |
| "grad_norm": 1354.34619140625, | |
| "learning_rate": 3e-06, | |
| "loss": -37.5826, | |
| "step": 2499 | |
| }, | |
| { | |
| "epoch": 0.2222222222222222, | |
| "grad_norm": 1030.41650390625, | |
| "learning_rate": 3e-06, | |
| "loss": 3.9406, | |
| "step": 2500 | |
| } | |
| ], | |
| "logging_steps": 1, | |
| "max_steps": 112500, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 10, | |
| "save_steps": 500, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": false | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 0.0, | |
| "train_batch_size": 6, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |