Text Generation
Transformers
Safetensors
qwen3
llama-factory
full
Generated from Trainer
conversational
text-generation-inference
Instructions to use cuong1692001/Terminal-data_processing with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- Transformers
How to use cuong1692001/Terminal-data_processing with Transformers:
# Use a pipeline as a high-level helper from transformers import pipeline pipe = pipeline("text-generation", model="cuong1692001/Terminal-data_processing") messages = [ {"role": "user", "content": "Who are you?"}, ] pipe(messages)# Load model directly from transformers import AutoTokenizer, AutoModelForCausalLM tokenizer = AutoTokenizer.from_pretrained("cuong1692001/Terminal-data_processing") model = AutoModelForCausalLM.from_pretrained("cuong1692001/Terminal-data_processing") messages = [ {"role": "user", "content": "Who are you?"}, ] inputs = tokenizer.apply_chat_template( messages, add_generation_prompt=True, tokenize=True, return_dict=True, return_tensors="pt", ).to(model.device) outputs = model.generate(**inputs, max_new_tokens=40) print(tokenizer.decode(outputs[0][inputs["input_ids"].shape[-1]:])) - Notebooks
- Google Colab
- Kaggle
- Local Apps Settings
- vLLM
How to use cuong1692001/Terminal-data_processing with vLLM:
Install from pip and serve model
# Install vLLM from pip: pip install vllm # Start the vLLM server: vllm serve "cuong1692001/Terminal-data_processing" # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:8000/v1/chat/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "cuong1692001/Terminal-data_processing", "messages": [ { "role": "user", "content": "What is the capital of France?" } ] }'Use Docker
docker model run hf.co/cuong1692001/Terminal-data_processing
- SGLang
How to use cuong1692001/Terminal-data_processing with SGLang:
Install from pip and serve model
# Install SGLang from pip: pip install sglang # Start the SGLang server: python3 -m sglang.launch_server \ --model-path "cuong1692001/Terminal-data_processing" \ --host 0.0.0.0 \ --port 30000 # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:30000/v1/chat/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "cuong1692001/Terminal-data_processing", "messages": [ { "role": "user", "content": "What is the capital of France?" } ] }'Use Docker images
docker run --gpus all \ --shm-size 32g \ -p 30000:30000 \ -v ~/.cache/huggingface:/root/.cache/huggingface \ --env "HF_TOKEN=<secret>" \ --ipc=host \ lmsysorg/sglang:latest \ python3 -m sglang.launch_server \ --model-path "cuong1692001/Terminal-data_processing" \ --host 0.0.0.0 \ --port 30000 # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:30000/v1/chat/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "cuong1692001/Terminal-data_processing", "messages": [ { "role": "user", "content": "What is the capital of France?" } ] }' - Docker Model Runner
How to use cuong1692001/Terminal-data_processing with Docker Model Runner:
docker model run hf.co/cuong1692001/Terminal-data_processing
| { | |
| "best_global_step": null, | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 2.0, | |
| "eval_steps": 500, | |
| "global_step": 2322, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.004306632213608958, | |
| "grad_norm": 1.5113693870546838, | |
| "learning_rate": 9.999926779237713e-06, | |
| "loss": 0.7518440246582031, | |
| "num_input_tokens_seen": 516464, | |
| "step": 5, | |
| "train_runtime": 80.3069, | |
| "train_tokens_per_second": 6431.131 | |
| }, | |
| { | |
| "epoch": 0.008613264427217916, | |
| "grad_norm": 1.1099479933103011, | |
| "learning_rate": 9.999629323566323e-06, | |
| "loss": 0.5895246028900146, | |
| "num_input_tokens_seen": 1050992, | |
| "step": 10, | |
| "train_runtime": 156.0878, | |
| "train_tokens_per_second": 6733.34 | |
| }, | |
| { | |
| "epoch": 0.012919896640826873, | |
| "grad_norm": 0.9566198183888653, | |
| "learning_rate": 9.999103070290155e-06, | |
| "loss": 0.5217817306518555, | |
| "num_input_tokens_seen": 1591440, | |
| "step": 15, | |
| "train_runtime": 221.4671, | |
| "train_tokens_per_second": 7185.897 | |
| }, | |
| { | |
| "epoch": 0.017226528854435832, | |
| "grad_norm": 0.8711180504651662, | |
| "learning_rate": 9.998348043492093e-06, | |
| "loss": 0.5254570960998535, | |
| "num_input_tokens_seen": 2104328, | |
| "step": 20, | |
| "train_runtime": 281.0116, | |
| "train_tokens_per_second": 7488.403 | |
| }, | |
| { | |
| "epoch": 0.02153316106804479, | |
| "grad_norm": 0.7568221566958359, | |
| "learning_rate": 9.997364277724362e-06, | |
| "loss": 0.49294347763061525, | |
| "num_input_tokens_seen": 2659352, | |
| "step": 25, | |
| "train_runtime": 342.2303, | |
| "train_tokens_per_second": 7770.65 | |
| }, | |
| { | |
| "epoch": 0.025839793281653745, | |
| "grad_norm": 1.110633668053048, | |
| "learning_rate": 9.996151818006951e-06, | |
| "loss": 0.4858407020568848, | |
| "num_input_tokens_seen": 3199536, | |
| "step": 30, | |
| "train_runtime": 411.1034, | |
| "train_tokens_per_second": 7782.802 | |
| }, | |
| { | |
| "epoch": 0.030146425495262703, | |
| "grad_norm": 1.307486068113022, | |
| "learning_rate": 9.99471071982555e-06, | |
| "loss": 0.4974493980407715, | |
| "num_input_tokens_seen": 3665456, | |
| "step": 35, | |
| "train_runtime": 477.4255, | |
| "train_tokens_per_second": 7677.545 | |
| }, | |
| { | |
| "epoch": 0.034453057708871665, | |
| "grad_norm": 0.7354991482453888, | |
| "learning_rate": 9.993041049129005e-06, | |
| "loss": 0.4639857292175293, | |
| "num_input_tokens_seen": 4181744, | |
| "step": 40, | |
| "train_runtime": 548.6431, | |
| "train_tokens_per_second": 7621.974 | |
| }, | |
| { | |
| "epoch": 0.03875968992248062, | |
| "grad_norm": 0.6823446795404668, | |
| "learning_rate": 9.99114288232632e-06, | |
| "loss": 0.4471879482269287, | |
| "num_input_tokens_seen": 4725304, | |
| "step": 45, | |
| "train_runtime": 625.6381, | |
| "train_tokens_per_second": 7552.775 | |
| }, | |
| { | |
| "epoch": 0.04306632213608958, | |
| "grad_norm": 0.6187958236434659, | |
| "learning_rate": 9.989016306283132e-06, | |
| "loss": 0.4194998264312744, | |
| "num_input_tokens_seen": 5217928, | |
| "step": 50, | |
| "train_runtime": 695.1978, | |
| "train_tokens_per_second": 7505.674 | |
| }, | |
| { | |
| "epoch": 0.047372954349698536, | |
| "grad_norm": 0.7217736924185857, | |
| "learning_rate": 9.986661418317759e-06, | |
| "loss": 0.4558732509613037, | |
| "num_input_tokens_seen": 5720264, | |
| "step": 55, | |
| "train_runtime": 762.7465, | |
| "train_tokens_per_second": 7499.561 | |
| }, | |
| { | |
| "epoch": 0.05167958656330749, | |
| "grad_norm": 1.1046030739670325, | |
| "learning_rate": 9.984078326196736e-06, | |
| "loss": 0.4496774673461914, | |
| "num_input_tokens_seen": 6200784, | |
| "step": 60, | |
| "train_runtime": 829.9567, | |
| "train_tokens_per_second": 7471.213 | |
| }, | |
| { | |
| "epoch": 0.05598621877691645, | |
| "grad_norm": 0.7074096009153457, | |
| "learning_rate": 9.981267148129884e-06, | |
| "loss": 0.4551366329193115, | |
| "num_input_tokens_seen": 6721120, | |
| "step": 65, | |
| "train_runtime": 901.8675, | |
| "train_tokens_per_second": 7452.448 | |
| }, | |
| { | |
| "epoch": 0.06029285099052541, | |
| "grad_norm": 0.6980358820573814, | |
| "learning_rate": 9.978228012764904e-06, | |
| "loss": 0.43771700859069823, | |
| "num_input_tokens_seen": 7202096, | |
| "step": 70, | |
| "train_runtime": 965.2084, | |
| "train_tokens_per_second": 7461.7 | |
| }, | |
| { | |
| "epoch": 0.06459948320413436, | |
| "grad_norm": 0.70967612822, | |
| "learning_rate": 9.974961059181482e-06, | |
| "loss": 0.44599485397338867, | |
| "num_input_tokens_seen": 7721720, | |
| "step": 75, | |
| "train_runtime": 1035.2088, | |
| "train_tokens_per_second": 7459.094 | |
| }, | |
| { | |
| "epoch": 0.06890611541774333, | |
| "grad_norm": 0.8195531593273334, | |
| "learning_rate": 9.971466436884933e-06, | |
| "loss": 0.4492579460144043, | |
| "num_input_tokens_seen": 8215480, | |
| "step": 80, | |
| "train_runtime": 1100.699, | |
| "train_tokens_per_second": 7463.875 | |
| }, | |
| { | |
| "epoch": 0.07321274763135228, | |
| "grad_norm": 0.6611353263896688, | |
| "learning_rate": 9.967744305799358e-06, | |
| "loss": 0.428343677520752, | |
| "num_input_tokens_seen": 8751608, | |
| "step": 85, | |
| "train_runtime": 1175.1956, | |
| "train_tokens_per_second": 7446.937 | |
| }, | |
| { | |
| "epoch": 0.07751937984496124, | |
| "grad_norm": 0.7344023965501802, | |
| "learning_rate": 9.963794836260316e-06, | |
| "loss": 0.44635858535766604, | |
| "num_input_tokens_seen": 9220400, | |
| "step": 90, | |
| "train_runtime": 1249.0723, | |
| "train_tokens_per_second": 7381.798 | |
| }, | |
| { | |
| "epoch": 0.0818260120585702, | |
| "grad_norm": 0.7993384059251473, | |
| "learning_rate": 9.959618209007045e-06, | |
| "loss": 0.41611676216125487, | |
| "num_input_tokens_seen": 9752480, | |
| "step": 95, | |
| "train_runtime": 1328.8505, | |
| "train_tokens_per_second": 7339.035 | |
| }, | |
| { | |
| "epoch": 0.08613264427217916, | |
| "grad_norm": 0.692666755511038, | |
| "learning_rate": 9.955214615174174e-06, | |
| "loss": 0.4301795959472656, | |
| "num_input_tokens_seen": 10286000, | |
| "step": 100, | |
| "train_runtime": 1397.8403, | |
| "train_tokens_per_second": 7358.494 | |
| }, | |
| { | |
| "epoch": 0.09043927648578812, | |
| "grad_norm": 0.8306336622178832, | |
| "learning_rate": 9.950584256282988e-06, | |
| "loss": 0.42357568740844725, | |
| "num_input_tokens_seen": 10786992, | |
| "step": 105, | |
| "train_runtime": 1465.2741, | |
| "train_tokens_per_second": 7361.757 | |
| }, | |
| { | |
| "epoch": 0.09474590869939707, | |
| "grad_norm": 0.7246154301219507, | |
| "learning_rate": 9.945727344232202e-06, | |
| "loss": 0.43195395469665526, | |
| "num_input_tokens_seen": 11324728, | |
| "step": 110, | |
| "train_runtime": 1543.9809, | |
| "train_tokens_per_second": 7334.759 | |
| }, | |
| { | |
| "epoch": 0.09905254091300603, | |
| "grad_norm": 0.7133273160650513, | |
| "learning_rate": 9.940644101288259e-06, | |
| "loss": 0.4098663806915283, | |
| "num_input_tokens_seen": 11842384, | |
| "step": 115, | |
| "train_runtime": 1610.1312, | |
| "train_tokens_per_second": 7354.919 | |
| }, | |
| { | |
| "epoch": 0.10335917312661498, | |
| "grad_norm": 0.7454776319580722, | |
| "learning_rate": 9.93533476007517e-06, | |
| "loss": 0.4260974884033203, | |
| "num_input_tokens_seen": 12395312, | |
| "step": 120, | |
| "train_runtime": 1679.6709, | |
| "train_tokens_per_second": 7379.607 | |
| }, | |
| { | |
| "epoch": 0.10766580534022395, | |
| "grad_norm": 0.7881403926209154, | |
| "learning_rate": 9.929799563563858e-06, | |
| "loss": 0.41834230422973634, | |
| "num_input_tokens_seen": 12876440, | |
| "step": 125, | |
| "train_runtime": 1748.4163, | |
| "train_tokens_per_second": 7364.63 | |
| }, | |
| { | |
| "epoch": 0.1119724375538329, | |
| "grad_norm": 0.7096308914101791, | |
| "learning_rate": 9.924038765061042e-06, | |
| "loss": 0.4106014251708984, | |
| "num_input_tokens_seen": 13376496, | |
| "step": 130, | |
| "train_runtime": 1818.2885, | |
| "train_tokens_per_second": 7356.641 | |
| }, | |
| { | |
| "epoch": 0.11627906976744186, | |
| "grad_norm": 0.7276233917576757, | |
| "learning_rate": 9.918052628197645e-06, | |
| "loss": 0.4307220458984375, | |
| "num_input_tokens_seen": 13872280, | |
| "step": 135, | |
| "train_runtime": 1879.0083, | |
| "train_tokens_per_second": 7382.767 | |
| }, | |
| { | |
| "epoch": 0.12058570198105081, | |
| "grad_norm": 0.6304682702555895, | |
| "learning_rate": 9.911841426916733e-06, | |
| "loss": 0.4186269760131836, | |
| "num_input_tokens_seen": 14373496, | |
| "step": 140, | |
| "train_runtime": 1948.9705, | |
| "train_tokens_per_second": 7374.917 | |
| }, | |
| { | |
| "epoch": 0.12489233419465978, | |
| "grad_norm": 0.7457639995354279, | |
| "learning_rate": 9.905405445460972e-06, | |
| "loss": 0.41518564224243165, | |
| "num_input_tokens_seen": 14886552, | |
| "step": 145, | |
| "train_runtime": 2013.9467, | |
| "train_tokens_per_second": 7391.731 | |
| }, | |
| { | |
| "epoch": 0.12919896640826872, | |
| "grad_norm": 0.7089829747760644, | |
| "learning_rate": 9.898744978359627e-06, | |
| "loss": 0.40087127685546875, | |
| "num_input_tokens_seen": 15364928, | |
| "step": 150, | |
| "train_runtime": 2080.6533, | |
| "train_tokens_per_second": 7384.665 | |
| }, | |
| { | |
| "epoch": 0.1335055986218777, | |
| "grad_norm": 0.7908692602439755, | |
| "learning_rate": 9.891860330415077e-06, | |
| "loss": 0.4161245346069336, | |
| "num_input_tokens_seen": 15877064, | |
| "step": 155, | |
| "train_runtime": 2148.1134, | |
| "train_tokens_per_second": 7391.166 | |
| }, | |
| { | |
| "epoch": 0.13781223083548666, | |
| "grad_norm": 0.7336628341974487, | |
| "learning_rate": 9.884751816688873e-06, | |
| "loss": 0.38726229667663575, | |
| "num_input_tokens_seen": 16362424, | |
| "step": 160, | |
| "train_runtime": 2219.2408, | |
| "train_tokens_per_second": 7372.983 | |
| }, | |
| { | |
| "epoch": 0.1421188630490956, | |
| "grad_norm": 0.7377502319782788, | |
| "learning_rate": 9.877419762487312e-06, | |
| "loss": 0.41555061340332033, | |
| "num_input_tokens_seen": 16884272, | |
| "step": 165, | |
| "train_runtime": 2285.0222, | |
| "train_tokens_per_second": 7389.106 | |
| }, | |
| { | |
| "epoch": 0.14642549526270457, | |
| "grad_norm": 0.763519054939035, | |
| "learning_rate": 9.869864503346557e-06, | |
| "loss": 0.42660012245178225, | |
| "num_input_tokens_seen": 17395904, | |
| "step": 170, | |
| "train_runtime": 2353.6081, | |
| "train_tokens_per_second": 7391.164 | |
| }, | |
| { | |
| "epoch": 0.1507321274763135, | |
| "grad_norm": 0.9454978823437594, | |
| "learning_rate": 9.862086385017283e-06, | |
| "loss": 0.3985039472579956, | |
| "num_input_tokens_seen": 17935128, | |
| "step": 175, | |
| "train_runtime": 2425.5746, | |
| "train_tokens_per_second": 7394.177 | |
| }, | |
| { | |
| "epoch": 0.15503875968992248, | |
| "grad_norm": 0.897983992254847, | |
| "learning_rate": 9.854085763448844e-06, | |
| "loss": 0.403258228302002, | |
| "num_input_tokens_seen": 18441240, | |
| "step": 180, | |
| "train_runtime": 2491.2198, | |
| "train_tokens_per_second": 7402.494 | |
| }, | |
| { | |
| "epoch": 0.15934539190353145, | |
| "grad_norm": 0.7028674441116541, | |
| "learning_rate": 9.845863004772994e-06, | |
| "loss": 0.4074747085571289, | |
| "num_input_tokens_seen": 18950248, | |
| "step": 185, | |
| "train_runtime": 2562.4101, | |
| "train_tokens_per_second": 7395.478 | |
| }, | |
| { | |
| "epoch": 0.1636520241171404, | |
| "grad_norm": 0.6890074181556956, | |
| "learning_rate": 9.837418485287126e-06, | |
| "loss": 0.3952253580093384, | |
| "num_input_tokens_seen": 19443272, | |
| "step": 190, | |
| "train_runtime": 2629.3175, | |
| "train_tokens_per_second": 7394.798 | |
| }, | |
| { | |
| "epoch": 0.16795865633074936, | |
| "grad_norm": 0.6864055327668929, | |
| "learning_rate": 9.82875259143706e-06, | |
| "loss": 0.4341114521026611, | |
| "num_input_tokens_seen": 19974056, | |
| "step": 195, | |
| "train_runtime": 2701.8046, | |
| "train_tokens_per_second": 7392.857 | |
| }, | |
| { | |
| "epoch": 0.17226528854435832, | |
| "grad_norm": 0.7549452956748491, | |
| "learning_rate": 9.819865719799344e-06, | |
| "loss": 0.39603476524353026, | |
| "num_input_tokens_seen": 20505304, | |
| "step": 200, | |
| "train_runtime": 2760.8615, | |
| "train_tokens_per_second": 7427.14 | |
| }, | |
| { | |
| "epoch": 0.17657192075796727, | |
| "grad_norm": 0.7461369795730732, | |
| "learning_rate": 9.81075827706312e-06, | |
| "loss": 0.3957651615142822, | |
| "num_input_tokens_seen": 21041488, | |
| "step": 205, | |
| "train_runtime": 2833.0985, | |
| "train_tokens_per_second": 7427.023 | |
| }, | |
| { | |
| "epoch": 0.18087855297157623, | |
| "grad_norm": 0.6726317666151067, | |
| "learning_rate": 9.801430680011499e-06, | |
| "loss": 0.3961822509765625, | |
| "num_input_tokens_seen": 21525384, | |
| "step": 210, | |
| "train_runtime": 2901.9507, | |
| "train_tokens_per_second": 7417.557 | |
| }, | |
| { | |
| "epoch": 0.18518518518518517, | |
| "grad_norm": 0.7153134316038106, | |
| "learning_rate": 9.791883355502503e-06, | |
| "loss": 0.42037005424499513, | |
| "num_input_tokens_seen": 22015656, | |
| "step": 215, | |
| "train_runtime": 2972.2855, | |
| "train_tokens_per_second": 7406.979 | |
| }, | |
| { | |
| "epoch": 0.18949181739879414, | |
| "grad_norm": 0.7441983537016906, | |
| "learning_rate": 9.782116740449515e-06, | |
| "loss": 0.3965612888336182, | |
| "num_input_tokens_seen": 22529696, | |
| "step": 220, | |
| "train_runtime": 3044.2115, | |
| "train_tokens_per_second": 7400.831 | |
| }, | |
| { | |
| "epoch": 0.1937984496124031, | |
| "grad_norm": 0.7350628011367314, | |
| "learning_rate": 9.7721312818013e-06, | |
| "loss": 0.44352130889892577, | |
| "num_input_tokens_seen": 23038360, | |
| "step": 225, | |
| "train_runtime": 3116.1306, | |
| "train_tokens_per_second": 7393.259 | |
| }, | |
| { | |
| "epoch": 0.19810508182601205, | |
| "grad_norm": 0.7563228965004609, | |
| "learning_rate": 9.761927436521534e-06, | |
| "loss": 0.38213863372802737, | |
| "num_input_tokens_seen": 23532864, | |
| "step": 230, | |
| "train_runtime": 3179.4752, | |
| "train_tokens_per_second": 7401.493 | |
| }, | |
| { | |
| "epoch": 0.20241171403962102, | |
| "grad_norm": 0.775450865404374, | |
| "learning_rate": 9.751505671567914e-06, | |
| "loss": 0.3937716484069824, | |
| "num_input_tokens_seen": 24005936, | |
| "step": 235, | |
| "train_runtime": 3253.9708, | |
| "train_tokens_per_second": 7377.428 | |
| }, | |
| { | |
| "epoch": 0.20671834625322996, | |
| "grad_norm": 0.7250225416398999, | |
| "learning_rate": 9.740866463870762e-06, | |
| "loss": 0.40571026802062987, | |
| "num_input_tokens_seen": 24490872, | |
| "step": 240, | |
| "train_runtime": 3322.9821, | |
| "train_tokens_per_second": 7370.149 | |
| }, | |
| { | |
| "epoch": 0.21102497846683893, | |
| "grad_norm": 0.7114153264209351, | |
| "learning_rate": 9.730010300311226e-06, | |
| "loss": 0.411739444732666, | |
| "num_input_tokens_seen": 25014232, | |
| "step": 245, | |
| "train_runtime": 3392.0309, | |
| "train_tokens_per_second": 7374.412 | |
| }, | |
| { | |
| "epoch": 0.2153316106804479, | |
| "grad_norm": 0.7013109032284867, | |
| "learning_rate": 9.718937677698976e-06, | |
| "loss": 0.40146331787109374, | |
| "num_input_tokens_seen": 25507448, | |
| "step": 250, | |
| "train_runtime": 3457.5164, | |
| "train_tokens_per_second": 7377.39 | |
| }, | |
| { | |
| "epoch": 0.21963824289405684, | |
| "grad_norm": 0.6984434047541229, | |
| "learning_rate": 9.707649102749488e-06, | |
| "loss": 0.3829845428466797, | |
| "num_input_tokens_seen": 26003584, | |
| "step": 255, | |
| "train_runtime": 3526.0072, | |
| "train_tokens_per_second": 7374.796 | |
| }, | |
| { | |
| "epoch": 0.2239448751076658, | |
| "grad_norm": 0.6930588317334605, | |
| "learning_rate": 9.696145092060843e-06, | |
| "loss": 0.37958288192749023, | |
| "num_input_tokens_seen": 26523584, | |
| "step": 260, | |
| "train_runtime": 3586.5787, | |
| "train_tokens_per_second": 7395.233 | |
| }, | |
| { | |
| "epoch": 0.22825150732127478, | |
| "grad_norm": 0.7883661370197577, | |
| "learning_rate": 9.684426172090084e-06, | |
| "loss": 0.4073479652404785, | |
| "num_input_tokens_seen": 27002040, | |
| "step": 265, | |
| "train_runtime": 3663.8397, | |
| "train_tokens_per_second": 7369.875 | |
| }, | |
| { | |
| "epoch": 0.23255813953488372, | |
| "grad_norm": 0.6795095661344687, | |
| "learning_rate": 9.672492879129136e-06, | |
| "loss": 0.40698766708374023, | |
| "num_input_tokens_seen": 27505176, | |
| "step": 270, | |
| "train_runtime": 3735.7865, | |
| "train_tokens_per_second": 7362.62 | |
| }, | |
| { | |
| "epoch": 0.2368647717484927, | |
| "grad_norm": 0.9228740109209783, | |
| "learning_rate": 9.660345759280254e-06, | |
| "loss": 0.4029866695404053, | |
| "num_input_tokens_seen": 28022920, | |
| "step": 275, | |
| "train_runtime": 3801.2628, | |
| "train_tokens_per_second": 7372.003 | |
| }, | |
| { | |
| "epoch": 0.24117140396210163, | |
| "grad_norm": 1.0690229200759769, | |
| "learning_rate": 9.647985368431031e-06, | |
| "loss": 0.4014443397521973, | |
| "num_input_tokens_seen": 28534944, | |
| "step": 280, | |
| "train_runtime": 3879.684, | |
| "train_tokens_per_second": 7354.966 | |
| }, | |
| { | |
| "epoch": 0.2454780361757106, | |
| "grad_norm": 0.7476961212143748, | |
| "learning_rate": 9.63541227222897e-06, | |
| "loss": 0.4226128101348877, | |
| "num_input_tokens_seen": 29035280, | |
| "step": 285, | |
| "train_runtime": 3948.0122, | |
| "train_tokens_per_second": 7354.405 | |
| }, | |
| { | |
| "epoch": 0.24978466838931956, | |
| "grad_norm": 0.7955697655841107, | |
| "learning_rate": 9.622627046055584e-06, | |
| "loss": 0.4024317264556885, | |
| "num_input_tokens_seen": 29597328, | |
| "step": 290, | |
| "train_runtime": 4029.1263, | |
| "train_tokens_per_second": 7345.843 | |
| }, | |
| { | |
| "epoch": 0.2540913006029285, | |
| "grad_norm": 0.6894431613335288, | |
| "learning_rate": 9.609630275000072e-06, | |
| "loss": 0.3940183162689209, | |
| "num_input_tokens_seen": 30094560, | |
| "step": 295, | |
| "train_runtime": 4100.0206, | |
| "train_tokens_per_second": 7340.1 | |
| }, | |
| { | |
| "epoch": 0.25839793281653745, | |
| "grad_norm": 0.6990387473878472, | |
| "learning_rate": 9.596422553832547e-06, | |
| "loss": 0.3797435760498047, | |
| "num_input_tokens_seen": 30628392, | |
| "step": 300, | |
| "train_runtime": 4171.3368, | |
| "train_tokens_per_second": 7342.584 | |
| }, | |
| { | |
| "epoch": 0.26270456503014644, | |
| "grad_norm": 0.740391991800684, | |
| "learning_rate": 9.583004486976813e-06, | |
| "loss": 0.3960827350616455, | |
| "num_input_tokens_seen": 31155408, | |
| "step": 305, | |
| "train_runtime": 4239.711, | |
| "train_tokens_per_second": 7348.474 | |
| }, | |
| { | |
| "epoch": 0.2670111972437554, | |
| "grad_norm": 0.6842903856341336, | |
| "learning_rate": 9.5693766884827e-06, | |
| "loss": 0.38438570499420166, | |
| "num_input_tokens_seen": 31648472, | |
| "step": 310, | |
| "train_runtime": 4304.3552, | |
| "train_tokens_per_second": 7352.663 | |
| }, | |
| { | |
| "epoch": 0.2713178294573643, | |
| "grad_norm": 0.7452536578366745, | |
| "learning_rate": 9.555539781997978e-06, | |
| "loss": 0.4010166645050049, | |
| "num_input_tokens_seen": 32148440, | |
| "step": 315, | |
| "train_runtime": 4375.0353, | |
| "train_tokens_per_second": 7348.156 | |
| }, | |
| { | |
| "epoch": 0.2756244616709733, | |
| "grad_norm": 0.6599734525587321, | |
| "learning_rate": 9.541494400739799e-06, | |
| "loss": 0.3938159465789795, | |
| "num_input_tokens_seen": 32659456, | |
| "step": 320, | |
| "train_runtime": 4449.4205, | |
| "train_tokens_per_second": 7340.159 | |
| }, | |
| { | |
| "epoch": 0.27993109388458226, | |
| "grad_norm": 0.6790075507627891, | |
| "learning_rate": 9.527241187465735e-06, | |
| "loss": 0.39118094444274903, | |
| "num_input_tokens_seen": 33157552, | |
| "step": 325, | |
| "train_runtime": 4514.8539, | |
| "train_tokens_per_second": 7344.103 | |
| }, | |
| { | |
| "epoch": 0.2842377260981912, | |
| "grad_norm": 0.6400320619933816, | |
| "learning_rate": 9.51278079444435e-06, | |
| "loss": 0.38675827980041505, | |
| "num_input_tokens_seen": 33687624, | |
| "step": 330, | |
| "train_runtime": 4579.3488, | |
| "train_tokens_per_second": 7356.422 | |
| }, | |
| { | |
| "epoch": 0.2885443583118002, | |
| "grad_norm": 0.8011737268019301, | |
| "learning_rate": 9.498113883425364e-06, | |
| "loss": 0.4036139488220215, | |
| "num_input_tokens_seen": 34171792, | |
| "step": 335, | |
| "train_runtime": 4653.543, | |
| "train_tokens_per_second": 7343.177 | |
| }, | |
| { | |
| "epoch": 0.29285099052540914, | |
| "grad_norm": 0.6748924299050266, | |
| "learning_rate": 9.483241125609358e-06, | |
| "loss": 0.41157045364379885, | |
| "num_input_tokens_seen": 34691536, | |
| "step": 340, | |
| "train_runtime": 4729.7805, | |
| "train_tokens_per_second": 7334.703 | |
| }, | |
| { | |
| "epoch": 0.2971576227390181, | |
| "grad_norm": 0.6981892601742133, | |
| "learning_rate": 9.468163201617063e-06, | |
| "loss": 0.42139220237731934, | |
| "num_input_tokens_seen": 35206192, | |
| "step": 345, | |
| "train_runtime": 4801.0436, | |
| "train_tokens_per_second": 7333.029 | |
| }, | |
| { | |
| "epoch": 0.301464254952627, | |
| "grad_norm": 0.7265361698204105, | |
| "learning_rate": 9.45288080145821e-06, | |
| "loss": 0.3920948028564453, | |
| "num_input_tokens_seen": 35705960, | |
| "step": 350, | |
| "train_runtime": 4873.848, | |
| "train_tokens_per_second": 7326.031 | |
| }, | |
| { | |
| "epoch": 0.305770887166236, | |
| "grad_norm": 0.6865303232424075, | |
| "learning_rate": 9.437394624499957e-06, | |
| "loss": 0.37868556976318357, | |
| "num_input_tokens_seen": 36206960, | |
| "step": 355, | |
| "train_runtime": 4934.8248, | |
| "train_tokens_per_second": 7337.03 | |
| }, | |
| { | |
| "epoch": 0.31007751937984496, | |
| "grad_norm": 0.8147162296164123, | |
| "learning_rate": 9.421705379434887e-06, | |
| "loss": 0.3680633544921875, | |
| "num_input_tokens_seen": 36749336, | |
| "step": 360, | |
| "train_runtime": 5009.1119, | |
| "train_tokens_per_second": 7336.497 | |
| }, | |
| { | |
| "epoch": 0.3143841515934539, | |
| "grad_norm": 0.6626000206050349, | |
| "learning_rate": 9.405813784248562e-06, | |
| "loss": 0.386813759803772, | |
| "num_input_tokens_seen": 37255480, | |
| "step": 365, | |
| "train_runtime": 5069.0004, | |
| "train_tokens_per_second": 7349.67 | |
| }, | |
| { | |
| "epoch": 0.3186907838070629, | |
| "grad_norm": 0.7212410819832733, | |
| "learning_rate": 9.38972056618668e-06, | |
| "loss": 0.38484036922454834, | |
| "num_input_tokens_seen": 37760024, | |
| "step": 370, | |
| "train_runtime": 5130.9861, | |
| "train_tokens_per_second": 7359.214 | |
| }, | |
| { | |
| "epoch": 0.32299741602067183, | |
| "grad_norm": 0.6530771248339148, | |
| "learning_rate": 9.37342646172179e-06, | |
| "loss": 0.3900888919830322, | |
| "num_input_tokens_seen": 38314528, | |
| "step": 375, | |
| "train_runtime": 5198.0649, | |
| "train_tokens_per_second": 7370.921 | |
| }, | |
| { | |
| "epoch": 0.3273040482342808, | |
| "grad_norm": 0.6310362265785353, | |
| "learning_rate": 9.356932216519588e-06, | |
| "loss": 0.3763136625289917, | |
| "num_input_tokens_seen": 38827464, | |
| "step": 380, | |
| "train_runtime": 5260.6931, | |
| "train_tokens_per_second": 7380.675 | |
| }, | |
| { | |
| "epoch": 0.33161068044788977, | |
| "grad_norm": 0.6821166049126685, | |
| "learning_rate": 9.340238585404787e-06, | |
| "loss": 0.38970165252685546, | |
| "num_input_tokens_seen": 39325920, | |
| "step": 385, | |
| "train_runtime": 5334.5963, | |
| "train_tokens_per_second": 7371.864 | |
| }, | |
| { | |
| "epoch": 0.3359173126614987, | |
| "grad_norm": 0.7414418790229163, | |
| "learning_rate": 9.32334633232659e-06, | |
| "loss": 0.3639005184173584, | |
| "num_input_tokens_seen": 39832488, | |
| "step": 390, | |
| "train_runtime": 5404.3058, | |
| "train_tokens_per_second": 7370.51 | |
| }, | |
| { | |
| "epoch": 0.34022394487510765, | |
| "grad_norm": 0.6281453488939435, | |
| "learning_rate": 9.306256230323714e-06, | |
| "loss": 0.3740285873413086, | |
| "num_input_tokens_seen": 40357880, | |
| "step": 395, | |
| "train_runtime": 5475.2685, | |
| "train_tokens_per_second": 7370.941 | |
| }, | |
| { | |
| "epoch": 0.34453057708871665, | |
| "grad_norm": 0.6881409462313944, | |
| "learning_rate": 9.28896906148902e-06, | |
| "loss": 0.39269113540649414, | |
| "num_input_tokens_seen": 40888088, | |
| "step": 400, | |
| "train_runtime": 5552.5547, | |
| "train_tokens_per_second": 7363.833 | |
| }, | |
| { | |
| "epoch": 0.3488372093023256, | |
| "grad_norm": 0.7050381848889274, | |
| "learning_rate": 9.271485616933725e-06, | |
| "loss": 0.38478426933288573, | |
| "num_input_tokens_seen": 41396320, | |
| "step": 405, | |
| "train_runtime": 5622.9597, | |
| "train_tokens_per_second": 7362.016 | |
| }, | |
| { | |
| "epoch": 0.35314384151593453, | |
| "grad_norm": 0.6582656508034226, | |
| "learning_rate": 9.253806696751185e-06, | |
| "loss": 0.38239054679870604, | |
| "num_input_tokens_seen": 41883152, | |
| "step": 410, | |
| "train_runtime": 5689.082, | |
| "train_tokens_per_second": 7362.023 | |
| }, | |
| { | |
| "epoch": 0.35745047372954347, | |
| "grad_norm": 0.6625271537573424, | |
| "learning_rate": 9.235933109980302e-06, | |
| "loss": 0.39941422939300536, | |
| "num_input_tokens_seen": 42386288, | |
| "step": 415, | |
| "train_runtime": 5763.4311, | |
| "train_tokens_per_second": 7354.35 | |
| }, | |
| { | |
| "epoch": 0.36175710594315247, | |
| "grad_norm": 0.6020944056659068, | |
| "learning_rate": 9.217865674568482e-06, | |
| "loss": 0.3732919216156006, | |
| "num_input_tokens_seen": 42921232, | |
| "step": 420, | |
| "train_runtime": 5830.9366, | |
| "train_tokens_per_second": 7360.95 | |
| }, | |
| { | |
| "epoch": 0.3660637381567614, | |
| "grad_norm": 0.6612749680911271, | |
| "learning_rate": 9.19960521733421e-06, | |
| "loss": 0.36808338165283205, | |
| "num_input_tokens_seen": 43431736, | |
| "step": 425, | |
| "train_runtime": 5899.5507, | |
| "train_tokens_per_second": 7361.872 | |
| }, | |
| { | |
| "epoch": 0.37037037037037035, | |
| "grad_norm": 0.7052929015034174, | |
| "learning_rate": 9.181152573929215e-06, | |
| "loss": 0.3811118841171265, | |
| "num_input_tokens_seen": 43944248, | |
| "step": 430, | |
| "train_runtime": 5967.9506, | |
| "train_tokens_per_second": 7363.373 | |
| }, | |
| { | |
| "epoch": 0.37467700258397935, | |
| "grad_norm": 0.6443665940829485, | |
| "learning_rate": 9.162508588800221e-06, | |
| "loss": 0.40485553741455077, | |
| "num_input_tokens_seen": 44470472, | |
| "step": 435, | |
| "train_runtime": 6044.9553, | |
| "train_tokens_per_second": 7356.625 | |
| }, | |
| { | |
| "epoch": 0.3789836347975883, | |
| "grad_norm": 0.6692181607943731, | |
| "learning_rate": 9.143674115150306e-06, | |
| "loss": 0.3950349807739258, | |
| "num_input_tokens_seen": 44997504, | |
| "step": 440, | |
| "train_runtime": 6115.4287, | |
| "train_tokens_per_second": 7358.029 | |
| }, | |
| { | |
| "epoch": 0.3832902670111972, | |
| "grad_norm": 0.6656325377440188, | |
| "learning_rate": 9.124650014899868e-06, | |
| "loss": 0.3851844310760498, | |
| "num_input_tokens_seen": 45490008, | |
| "step": 445, | |
| "train_runtime": 6183.897, | |
| "train_tokens_per_second": 7356.204 | |
| }, | |
| { | |
| "epoch": 0.3875968992248062, | |
| "grad_norm": 0.6553358070624373, | |
| "learning_rate": 9.105437158647154e-06, | |
| "loss": 0.40310277938842776, | |
| "num_input_tokens_seen": 46027896, | |
| "step": 450, | |
| "train_runtime": 6243.5546, | |
| "train_tokens_per_second": 7372.066 | |
| }, | |
| { | |
| "epoch": 0.39190353143841516, | |
| "grad_norm": 0.6866680311340208, | |
| "learning_rate": 9.086036425628453e-06, | |
| "loss": 0.39217004776000974, | |
| "num_input_tokens_seen": 46543128, | |
| "step": 455, | |
| "train_runtime": 6313.3771, | |
| "train_tokens_per_second": 7372.144 | |
| }, | |
| { | |
| "epoch": 0.3962101636520241, | |
| "grad_norm": 0.642839268974144, | |
| "learning_rate": 9.066448703677828e-06, | |
| "loss": 0.3778968811035156, | |
| "num_input_tokens_seen": 47048496, | |
| "step": 460, | |
| "train_runtime": 6382.8396, | |
| "train_tokens_per_second": 7371.092 | |
| }, | |
| { | |
| "epoch": 0.4005167958656331, | |
| "grad_norm": 0.5917876909438503, | |
| "learning_rate": 9.046674889186509e-06, | |
| "loss": 0.3833005905151367, | |
| "num_input_tokens_seen": 47582216, | |
| "step": 465, | |
| "train_runtime": 6459.5385, | |
| "train_tokens_per_second": 7366.194 | |
| }, | |
| { | |
| "epoch": 0.40482342807924204, | |
| "grad_norm": 0.6249300807978534, | |
| "learning_rate": 9.026715887061858e-06, | |
| "loss": 0.38653390407562255, | |
| "num_input_tokens_seen": 48103904, | |
| "step": 470, | |
| "train_runtime": 6531.4172, | |
| "train_tokens_per_second": 7365.003 | |
| }, | |
| { | |
| "epoch": 0.409130060292851, | |
| "grad_norm": 0.6723447300315076, | |
| "learning_rate": 9.006572610685969e-06, | |
| "loss": 0.3771857738494873, | |
| "num_input_tokens_seen": 48589720, | |
| "step": 475, | |
| "train_runtime": 6599.0101, | |
| "train_tokens_per_second": 7363.183 | |
| }, | |
| { | |
| "epoch": 0.4134366925064599, | |
| "grad_norm": 0.6774904420934339, | |
| "learning_rate": 8.986245981873854e-06, | |
| "loss": 0.39016103744506836, | |
| "num_input_tokens_seen": 49120400, | |
| "step": 480, | |
| "train_runtime": 6669.6779, | |
| "train_tokens_per_second": 7364.733 | |
| }, | |
| { | |
| "epoch": 0.4177433247200689, | |
| "grad_norm": 0.6443265118405658, | |
| "learning_rate": 8.965736930831272e-06, | |
| "loss": 0.398733115196228, | |
| "num_input_tokens_seen": 49651912, | |
| "step": 485, | |
| "train_runtime": 6740.852, | |
| "train_tokens_per_second": 7365.821 | |
| }, | |
| { | |
| "epoch": 0.42204995693367786, | |
| "grad_norm": 0.6266168108513596, | |
| "learning_rate": 8.945046396112158e-06, | |
| "loss": 0.3702796697616577, | |
| "num_input_tokens_seen": 50164680, | |
| "step": 490, | |
| "train_runtime": 6819.9627, | |
| "train_tokens_per_second": 7355.565 | |
| }, | |
| { | |
| "epoch": 0.4263565891472868, | |
| "grad_norm": 0.6275664299170484, | |
| "learning_rate": 8.92417532457566e-06, | |
| "loss": 0.36701819896697996, | |
| "num_input_tokens_seen": 50674248, | |
| "step": 495, | |
| "train_runtime": 6883.5614, | |
| "train_tokens_per_second": 7361.632 | |
| }, | |
| { | |
| "epoch": 0.4306632213608958, | |
| "grad_norm": 0.7306000236240069, | |
| "learning_rate": 8.903124671342822e-06, | |
| "loss": 0.3911170959472656, | |
| "num_input_tokens_seen": 51200088, | |
| "step": 500, | |
| "train_runtime": 6955.2058, | |
| "train_tokens_per_second": 7361.405 | |
| }, | |
| { | |
| "epoch": 0.43496985357450474, | |
| "grad_norm": 0.7330422033270405, | |
| "learning_rate": 8.881895399752873e-06, | |
| "loss": 0.39371590614318847, | |
| "num_input_tokens_seen": 51742280, | |
| "step": 505, | |
| "train_runtime": 7024.383, | |
| "train_tokens_per_second": 7366.096 | |
| }, | |
| { | |
| "epoch": 0.4392764857881137, | |
| "grad_norm": 0.7263151417995986, | |
| "learning_rate": 8.86048848131913e-06, | |
| "loss": 0.3914726972579956, | |
| "num_input_tokens_seen": 52294208, | |
| "step": 510, | |
| "train_runtime": 7092.1416, | |
| "train_tokens_per_second": 7373.543 | |
| }, | |
| { | |
| "epoch": 0.4435831180017227, | |
| "grad_norm": 0.7435120437949877, | |
| "learning_rate": 8.838904895684555e-06, | |
| "loss": 0.39632158279418944, | |
| "num_input_tokens_seen": 52831024, | |
| "step": 515, | |
| "train_runtime": 7167.5571, | |
| "train_tokens_per_second": 7370.855 | |
| }, | |
| { | |
| "epoch": 0.4478897502153316, | |
| "grad_norm": 0.7138089969072026, | |
| "learning_rate": 8.81714563057691e-06, | |
| "loss": 0.38961071968078614, | |
| "num_input_tokens_seen": 53372888, | |
| "step": 520, | |
| "train_runtime": 7238.2171, | |
| "train_tokens_per_second": 7373.762 | |
| }, | |
| { | |
| "epoch": 0.45219638242894056, | |
| "grad_norm": 0.705868522029541, | |
| "learning_rate": 8.795211681763565e-06, | |
| "loss": 0.37157084941864016, | |
| "num_input_tokens_seen": 53869408, | |
| "step": 525, | |
| "train_runtime": 7309.2034, | |
| "train_tokens_per_second": 7370.079 | |
| }, | |
| { | |
| "epoch": 0.45650301464254955, | |
| "grad_norm": 0.6507976015541289, | |
| "learning_rate": 8.773104053005928e-06, | |
| "loss": 0.37537417411804197, | |
| "num_input_tokens_seen": 54368528, | |
| "step": 530, | |
| "train_runtime": 7380.1139, | |
| "train_tokens_per_second": 7366.896 | |
| }, | |
| { | |
| "epoch": 0.4608096468561585, | |
| "grad_norm": 0.6423561579091476, | |
| "learning_rate": 8.750823756013498e-06, | |
| "loss": 0.3898594379425049, | |
| "num_input_tokens_seen": 54840552, | |
| "step": 535, | |
| "train_runtime": 7445.4319, | |
| "train_tokens_per_second": 7365.664 | |
| }, | |
| { | |
| "epoch": 0.46511627906976744, | |
| "grad_norm": 0.6245224202252966, | |
| "learning_rate": 8.728371810397586e-06, | |
| "loss": 0.38840298652648925, | |
| "num_input_tokens_seen": 55337456, | |
| "step": 540, | |
| "train_runtime": 7511.6912, | |
| "train_tokens_per_second": 7366.844 | |
| }, | |
| { | |
| "epoch": 0.4694229112833764, | |
| "grad_norm": 0.6635902775699029, | |
| "learning_rate": 8.705749243624635e-06, | |
| "loss": 0.3719607353210449, | |
| "num_input_tokens_seen": 55848016, | |
| "step": 545, | |
| "train_runtime": 7574.5817, | |
| "train_tokens_per_second": 7373.083 | |
| }, | |
| { | |
| "epoch": 0.4737295434969854, | |
| "grad_norm": 0.6636142138451493, | |
| "learning_rate": 8.68295709096922e-06, | |
| "loss": 0.3457005262374878, | |
| "num_input_tokens_seen": 56307448, | |
| "step": 550, | |
| "train_runtime": 7643.8778, | |
| "train_tokens_per_second": 7366.346 | |
| }, | |
| { | |
| "epoch": 0.4780361757105943, | |
| "grad_norm": 0.6513434980018162, | |
| "learning_rate": 8.659996395466648e-06, | |
| "loss": 0.36336798667907716, | |
| "num_input_tokens_seen": 56791296, | |
| "step": 555, | |
| "train_runtime": 7708.7709, | |
| "train_tokens_per_second": 7367.101 | |
| }, | |
| { | |
| "epoch": 0.48234280792420325, | |
| "grad_norm": 0.655388134773357, | |
| "learning_rate": 8.636868207865244e-06, | |
| "loss": 0.3849674463272095, | |
| "num_input_tokens_seen": 57286592, | |
| "step": 560, | |
| "train_runtime": 7776.3092, | |
| "train_tokens_per_second": 7366.81 | |
| }, | |
| { | |
| "epoch": 0.48664944013781225, | |
| "grad_norm": 0.6158794330991001, | |
| "learning_rate": 8.613573586578262e-06, | |
| "loss": 0.3585472583770752, | |
| "num_input_tokens_seen": 57779240, | |
| "step": 565, | |
| "train_runtime": 7846.3275, | |
| "train_tokens_per_second": 7363.858 | |
| }, | |
| { | |
| "epoch": 0.4909560723514212, | |
| "grad_norm": 0.6282187190034991, | |
| "learning_rate": 8.590113597635443e-06, | |
| "loss": 0.3671374797821045, | |
| "num_input_tokens_seen": 58296520, | |
| "step": 570, | |
| "train_runtime": 7902.9671, | |
| "train_tokens_per_second": 7376.536 | |
| }, | |
| { | |
| "epoch": 0.49526270456503013, | |
| "grad_norm": 0.6820366351035635, | |
| "learning_rate": 8.56648931463423e-06, | |
| "loss": 0.3729721546173096, | |
| "num_input_tokens_seen": 58787768, | |
| "step": 575, | |
| "train_runtime": 7976.9359, | |
| "train_tokens_per_second": 7369.718 | |
| }, | |
| { | |
| "epoch": 0.49956933677863913, | |
| "grad_norm": 0.5953948188219655, | |
| "learning_rate": 8.54270181869065e-06, | |
| "loss": 0.37489094734191897, | |
| "num_input_tokens_seen": 59253688, | |
| "step": 580, | |
| "train_runtime": 8050.0837, | |
| "train_tokens_per_second": 7360.63 | |
| }, | |
| { | |
| "epoch": 0.5038759689922481, | |
| "grad_norm": 0.6536807624369786, | |
| "learning_rate": 8.518752198389823e-06, | |
| "loss": 0.3806663990020752, | |
| "num_input_tokens_seen": 59777648, | |
| "step": 585, | |
| "train_runtime": 8117.638, | |
| "train_tokens_per_second": 7363.921 | |
| }, | |
| { | |
| "epoch": 0.508182601205857, | |
| "grad_norm": 0.7077876190780552, | |
| "learning_rate": 8.494641549736152e-06, | |
| "loss": 0.35221052169799805, | |
| "num_input_tokens_seen": 60267056, | |
| "step": 590, | |
| "train_runtime": 8186.7766, | |
| "train_tokens_per_second": 7361.512 | |
| }, | |
| { | |
| "epoch": 0.512489233419466, | |
| "grad_norm": 0.6112186065184119, | |
| "learning_rate": 8.470370976103171e-06, | |
| "loss": 0.37081544399261473, | |
| "num_input_tokens_seen": 60752944, | |
| "step": 595, | |
| "train_runtime": 8248.8743, | |
| "train_tokens_per_second": 7364.998 | |
| }, | |
| { | |
| "epoch": 0.5167958656330749, | |
| "grad_norm": 0.6544431163583635, | |
| "learning_rate": 8.445941588183042e-06, | |
| "loss": 0.3763900279998779, | |
| "num_input_tokens_seen": 61249152, | |
| "step": 600, | |
| "train_runtime": 8320.3633, | |
| "train_tokens_per_second": 7361.356 | |
| }, | |
| { | |
| "epoch": 0.5211024978466839, | |
| "grad_norm": 0.6502245015430211, | |
| "learning_rate": 8.421354503935733e-06, | |
| "loss": 0.3733165740966797, | |
| "num_input_tokens_seen": 61738896, | |
| "step": 605, | |
| "train_runtime": 8389.2584, | |
| "train_tokens_per_second": 7359.279 | |
| }, | |
| { | |
| "epoch": 0.5254091300602929, | |
| "grad_norm": 0.5791570703545336, | |
| "learning_rate": 8.396610848537858e-06, | |
| "loss": 0.36464879512786863, | |
| "num_input_tokens_seen": 62285720, | |
| "step": 610, | |
| "train_runtime": 8468.6352, | |
| "train_tokens_per_second": 7354.871 | |
| }, | |
| { | |
| "epoch": 0.5297157622739018, | |
| "grad_norm": 0.6648330559819625, | |
| "learning_rate": 8.371711754331181e-06, | |
| "loss": 0.3548685073852539, | |
| "num_input_tokens_seen": 62773392, | |
| "step": 615, | |
| "train_runtime": 8538.4686, | |
| "train_tokens_per_second": 7351.833 | |
| }, | |
| { | |
| "epoch": 0.5340223944875108, | |
| "grad_norm": 0.7039435693401701, | |
| "learning_rate": 8.346658360770798e-06, | |
| "loss": 0.37767829895019533, | |
| "num_input_tokens_seen": 63297240, | |
| "step": 620, | |
| "train_runtime": 8605.0669, | |
| "train_tokens_per_second": 7355.81 | |
| }, | |
| { | |
| "epoch": 0.5383290267011197, | |
| "grad_norm": 0.7143763877497393, | |
| "learning_rate": 8.321451814372998e-06, | |
| "loss": 0.37348289489746095, | |
| "num_input_tokens_seen": 63821872, | |
| "step": 625, | |
| "train_runtime": 8674.53, | |
| "train_tokens_per_second": 7357.387 | |
| }, | |
| { | |
| "epoch": 0.5426356589147286, | |
| "grad_norm": 0.7179284363192735, | |
| "learning_rate": 8.296093268662779e-06, | |
| "loss": 0.3854733228683472, | |
| "num_input_tokens_seen": 64361320, | |
| "step": 630, | |
| "train_runtime": 8743.8704, | |
| "train_tokens_per_second": 7360.736 | |
| }, | |
| { | |
| "epoch": 0.5469422911283376, | |
| "grad_norm": 0.9509086516647082, | |
| "learning_rate": 8.270583884121083e-06, | |
| "loss": 0.3740884780883789, | |
| "num_input_tokens_seen": 64877880, | |
| "step": 635, | |
| "train_runtime": 8818.0876, | |
| "train_tokens_per_second": 7357.364 | |
| }, | |
| { | |
| "epoch": 0.5512489233419466, | |
| "grad_norm": 0.6546022698428935, | |
| "learning_rate": 8.244924828131668e-06, | |
| "loss": 0.3512680768966675, | |
| "num_input_tokens_seen": 65380760, | |
| "step": 640, | |
| "train_runtime": 8884.2528, | |
| "train_tokens_per_second": 7359.174 | |
| }, | |
| { | |
| "epoch": 0.5555555555555556, | |
| "grad_norm": 0.6415517399290378, | |
| "learning_rate": 8.219117274927696e-06, | |
| "loss": 0.36990017890930177, | |
| "num_input_tokens_seen": 65889880, | |
| "step": 645, | |
| "train_runtime": 8944.8369, | |
| "train_tokens_per_second": 7366.247 | |
| }, | |
| { | |
| "epoch": 0.5598621877691645, | |
| "grad_norm": 0.6441817326099615, | |
| "learning_rate": 8.193162405537997e-06, | |
| "loss": 0.3620732307434082, | |
| "num_input_tokens_seen": 66397256, | |
| "step": 650, | |
| "train_runtime": 9013.6144, | |
| "train_tokens_per_second": 7366.33 | |
| }, | |
| { | |
| "epoch": 0.5641688199827735, | |
| "grad_norm": 0.6686614531494821, | |
| "learning_rate": 8.167061407733018e-06, | |
| "loss": 0.37317404747009275, | |
| "num_input_tokens_seen": 66907624, | |
| "step": 655, | |
| "train_runtime": 9082.5463, | |
| "train_tokens_per_second": 7366.615 | |
| }, | |
| { | |
| "epoch": 0.5684754521963824, | |
| "grad_norm": 0.6944822176730631, | |
| "learning_rate": 8.14081547597046e-06, | |
| "loss": 0.36172118186950686, | |
| "num_input_tokens_seen": 67392080, | |
| "step": 660, | |
| "train_runtime": 9153.9802, | |
| "train_tokens_per_second": 7362.052 | |
| }, | |
| { | |
| "epoch": 0.5727820844099913, | |
| "grad_norm": 0.6266997505339615, | |
| "learning_rate": 8.114425811340635e-06, | |
| "loss": 0.36071798801422117, | |
| "num_input_tokens_seen": 67880624, | |
| "step": 665, | |
| "train_runtime": 9227.7772, | |
| "train_tokens_per_second": 7356.119 | |
| }, | |
| { | |
| "epoch": 0.5770887166236004, | |
| "grad_norm": 0.7282530987540681, | |
| "learning_rate": 8.087893621511487e-06, | |
| "loss": 0.39921371936798095, | |
| "num_input_tokens_seen": 68390984, | |
| "step": 670, | |
| "train_runtime": 9296.9161, | |
| "train_tokens_per_second": 7356.309 | |
| }, | |
| { | |
| "epoch": 0.5813953488372093, | |
| "grad_norm": 0.6426511038776365, | |
| "learning_rate": 8.061220120673323e-06, | |
| "loss": 0.38092870712280275, | |
| "num_input_tokens_seen": 68896344, | |
| "step": 675, | |
| "train_runtime": 9363.7793, | |
| "train_tokens_per_second": 7357.75 | |
| }, | |
| { | |
| "epoch": 0.5857019810508183, | |
| "grad_norm": 0.6872672773444454, | |
| "learning_rate": 8.03440652948326e-06, | |
| "loss": 0.35166177749633787, | |
| "num_input_tokens_seen": 69390728, | |
| "step": 680, | |
| "train_runtime": 9436.3566, | |
| "train_tokens_per_second": 7353.551 | |
| }, | |
| { | |
| "epoch": 0.5900086132644272, | |
| "grad_norm": 0.6663950335905633, | |
| "learning_rate": 8.007454075009352e-06, | |
| "loss": 0.36625821590423585, | |
| "num_input_tokens_seen": 69913800, | |
| "step": 685, | |
| "train_runtime": 9512.1057, | |
| "train_tokens_per_second": 7349.981 | |
| }, | |
| { | |
| "epoch": 0.5943152454780362, | |
| "grad_norm": 0.6365208018382441, | |
| "learning_rate": 7.980363990674449e-06, | |
| "loss": 0.3945136070251465, | |
| "num_input_tokens_seen": 70444064, | |
| "step": 690, | |
| "train_runtime": 9585.1462, | |
| "train_tokens_per_second": 7349.295 | |
| }, | |
| { | |
| "epoch": 0.5986218776916451, | |
| "grad_norm": 0.6592472029548292, | |
| "learning_rate": 7.953137516199737e-06, | |
| "loss": 0.36552479267120364, | |
| "num_input_tokens_seen": 70941680, | |
| "step": 695, | |
| "train_runtime": 9653.5644, | |
| "train_tokens_per_second": 7348.755 | |
| }, | |
| { | |
| "epoch": 0.602928509905254, | |
| "grad_norm": 0.6421651719984249, | |
| "learning_rate": 7.925775897548013e-06, | |
| "loss": 0.3570195198059082, | |
| "num_input_tokens_seen": 71441992, | |
| "step": 700, | |
| "train_runtime": 9720.2583, | |
| "train_tokens_per_second": 7349.804 | |
| }, | |
| { | |
| "epoch": 0.6072351421188631, | |
| "grad_norm": 0.6384747569278264, | |
| "learning_rate": 7.898280386866673e-06, | |
| "loss": 0.3764955043792725, | |
| "num_input_tokens_seen": 71945280, | |
| "step": 705, | |
| "train_runtime": 9788.1349, | |
| "train_tokens_per_second": 7350.254 | |
| }, | |
| { | |
| "epoch": 0.611541774332472, | |
| "grad_norm": 0.7114005381070436, | |
| "learning_rate": 7.87065224243039e-06, | |
| "loss": 0.38172245025634766, | |
| "num_input_tokens_seen": 72420368, | |
| "step": 710, | |
| "train_runtime": 9855.659, | |
| "train_tokens_per_second": 7348.1 | |
| }, | |
| { | |
| "epoch": 0.615848406546081, | |
| "grad_norm": 0.6215545093138584, | |
| "learning_rate": 7.842892728583557e-06, | |
| "loss": 0.3583367824554443, | |
| "num_input_tokens_seen": 72937952, | |
| "step": 715, | |
| "train_runtime": 9916.7096, | |
| "train_tokens_per_second": 7355.056 | |
| }, | |
| { | |
| "epoch": 0.6201550387596899, | |
| "grad_norm": 0.6932264510907867, | |
| "learning_rate": 7.815003115682405e-06, | |
| "loss": 0.3677781581878662, | |
| "num_input_tokens_seen": 73453368, | |
| "step": 720, | |
| "train_runtime": 9987.9385, | |
| "train_tokens_per_second": 7354.207 | |
| }, | |
| { | |
| "epoch": 0.6244616709732989, | |
| "grad_norm": 0.6755500942949265, | |
| "learning_rate": 7.78698468003688e-06, | |
| "loss": 0.36302263736724855, | |
| "num_input_tokens_seen": 73959264, | |
| "step": 725, | |
| "train_runtime": 10054.9379, | |
| "train_tokens_per_second": 7355.517 | |
| }, | |
| { | |
| "epoch": 0.6287683031869078, | |
| "grad_norm": 0.6952999463715118, | |
| "learning_rate": 7.75883870385223e-06, | |
| "loss": 0.37976875305175783, | |
| "num_input_tokens_seen": 74444936, | |
| "step": 730, | |
| "train_runtime": 10131.0665, | |
| "train_tokens_per_second": 7348.184 | |
| }, | |
| { | |
| "epoch": 0.6330749354005168, | |
| "grad_norm": 0.735886603640982, | |
| "learning_rate": 7.730566475170334e-06, | |
| "loss": 0.3730980157852173, | |
| "num_input_tokens_seen": 74962952, | |
| "step": 735, | |
| "train_runtime": 10196.8779, | |
| "train_tokens_per_second": 7351.559 | |
| }, | |
| { | |
| "epoch": 0.6373815676141258, | |
| "grad_norm": 0.6824970423272824, | |
| "learning_rate": 7.702169287810751e-06, | |
| "loss": 0.3567670345306396, | |
| "num_input_tokens_seen": 75451216, | |
| "step": 740, | |
| "train_runtime": 10261.135, | |
| "train_tokens_per_second": 7353.106 | |
| }, | |
| { | |
| "epoch": 0.6416881998277347, | |
| "grad_norm": 0.567176098283514, | |
| "learning_rate": 7.67364844131151e-06, | |
| "loss": 0.3866007328033447, | |
| "num_input_tokens_seen": 76008248, | |
| "step": 745, | |
| "train_runtime": 10322.9684, | |
| "train_tokens_per_second": 7363.022 | |
| }, | |
| { | |
| "epoch": 0.6459948320413437, | |
| "grad_norm": 0.6031679181986589, | |
| "learning_rate": 7.645005240869644e-06, | |
| "loss": 0.3532373905181885, | |
| "num_input_tokens_seen": 76529328, | |
| "step": 750, | |
| "train_runtime": 10389.7584, | |
| "train_tokens_per_second": 7365.843 | |
| }, | |
| { | |
| "epoch": 0.6503014642549526, | |
| "grad_norm": 0.7159571723969788, | |
| "learning_rate": 7.616240997281465e-06, | |
| "loss": 0.3541311502456665, | |
| "num_input_tokens_seen": 77047776, | |
| "step": 755, | |
| "train_runtime": 10466.7096, | |
| "train_tokens_per_second": 7361.222 | |
| }, | |
| { | |
| "epoch": 0.6546080964685616, | |
| "grad_norm": 0.634378436444587, | |
| "learning_rate": 7.587357026882563e-06, | |
| "loss": 0.36530122756958006, | |
| "num_input_tokens_seen": 77585376, | |
| "step": 760, | |
| "train_runtime": 10541.6333, | |
| "train_tokens_per_second": 7359.901 | |
| }, | |
| { | |
| "epoch": 0.6589147286821705, | |
| "grad_norm": 0.6298702094677439, | |
| "learning_rate": 7.558354651487583e-06, | |
| "loss": 0.3949885845184326, | |
| "num_input_tokens_seen": 78094440, | |
| "step": 765, | |
| "train_runtime": 10611.005, | |
| "train_tokens_per_second": 7359.759 | |
| }, | |
| { | |
| "epoch": 0.6632213608957795, | |
| "grad_norm": 0.6249157337085598, | |
| "learning_rate": 7.529235198329723e-06, | |
| "loss": 0.3596529006958008, | |
| "num_input_tokens_seen": 78635856, | |
| "step": 770, | |
| "train_runtime": 10691.6063, | |
| "train_tokens_per_second": 7354.915 | |
| }, | |
| { | |
| "epoch": 0.6675279931093885, | |
| "grad_norm": 0.6023145192971817, | |
| "learning_rate": 7.500000000000001e-06, | |
| "loss": 0.35566143989562987, | |
| "num_input_tokens_seen": 79138600, | |
| "step": 775, | |
| "train_runtime": 10756.7977, | |
| "train_tokens_per_second": 7357.078 | |
| }, | |
| { | |
| "epoch": 0.6718346253229974, | |
| "grad_norm": 0.6593097675747426, | |
| "learning_rate": 7.470650394386274e-06, | |
| "loss": 0.36934409141540525, | |
| "num_input_tokens_seen": 79642424, | |
| "step": 780, | |
| "train_runtime": 10825.3842, | |
| "train_tokens_per_second": 7357.007 | |
| }, | |
| { | |
| "epoch": 0.6761412575366064, | |
| "grad_norm": 0.7137202399672637, | |
| "learning_rate": 7.441187724612007e-06, | |
| "loss": 0.37495574951171873, | |
| "num_input_tokens_seen": 80152744, | |
| "step": 785, | |
| "train_runtime": 10890.6152, | |
| "train_tokens_per_second": 7359.8 | |
| }, | |
| { | |
| "epoch": 0.6804478897502153, | |
| "grad_norm": 0.605977280910932, | |
| "learning_rate": 7.4116133389748115e-06, | |
| "loss": 0.37909436225891113, | |
| "num_input_tokens_seen": 80693400, | |
| "step": 790, | |
| "train_runtime": 10944.0447, | |
| "train_tokens_per_second": 7373.27 | |
| }, | |
| { | |
| "epoch": 0.6847545219638242, | |
| "grad_norm": 0.7982064055258384, | |
| "learning_rate": 7.381928590884741e-06, | |
| "loss": 0.361489462852478, | |
| "num_input_tokens_seen": 81180032, | |
| "step": 795, | |
| "train_runtime": 11009.7965, | |
| "train_tokens_per_second": 7373.436 | |
| }, | |
| { | |
| "epoch": 0.6890611541774333, | |
| "grad_norm": 0.560346275445483, | |
| "learning_rate": 7.35213483880236e-06, | |
| "loss": 0.34208495616912843, | |
| "num_input_tokens_seen": 81715960, | |
| "step": 800, | |
| "train_runtime": 11082.868, | |
| "train_tokens_per_second": 7373.178 | |
| }, | |
| { | |
| "epoch": 0.6933677863910422, | |
| "grad_norm": 0.7576401916556533, | |
| "learning_rate": 7.322233446176571e-06, | |
| "loss": 0.3672114133834839, | |
| "num_input_tokens_seen": 82195808, | |
| "step": 805, | |
| "train_runtime": 11153.6467, | |
| "train_tokens_per_second": 7369.411 | |
| }, | |
| { | |
| "epoch": 0.6976744186046512, | |
| "grad_norm": 0.6525903599941298, | |
| "learning_rate": 7.29222578138222e-06, | |
| "loss": 0.3744704008102417, | |
| "num_input_tokens_seen": 82680488, | |
| "step": 810, | |
| "train_runtime": 11221.591, | |
| "train_tokens_per_second": 7367.983 | |
| }, | |
| { | |
| "epoch": 0.7019810508182601, | |
| "grad_norm": 0.5908244133789757, | |
| "learning_rate": 7.2621132176574774e-06, | |
| "loss": 0.373249888420105, | |
| "num_input_tokens_seen": 83207016, | |
| "step": 815, | |
| "train_runtime": 11292.0868, | |
| "train_tokens_per_second": 7368.613 | |
| }, | |
| { | |
| "epoch": 0.7062876830318691, | |
| "grad_norm": 0.8900954684782045, | |
| "learning_rate": 7.231897133040997e-06, | |
| "loss": 0.34656834602355957, | |
| "num_input_tokens_seen": 83705464, | |
| "step": 820, | |
| "train_runtime": 11366.0296, | |
| "train_tokens_per_second": 7364.53 | |
| }, | |
| { | |
| "epoch": 0.710594315245478, | |
| "grad_norm": 0.6599764009158128, | |
| "learning_rate": 7.201578910308848e-06, | |
| "loss": 0.36565725803375243, | |
| "num_input_tokens_seen": 84223624, | |
| "step": 825, | |
| "train_runtime": 11439.0205, | |
| "train_tokens_per_second": 7362.835 | |
| }, | |
| { | |
| "epoch": 0.7149009474590869, | |
| "grad_norm": 0.7671400370894249, | |
| "learning_rate": 7.1711599369112385e-06, | |
| "loss": 0.3640167474746704, | |
| "num_input_tokens_seen": 84733008, | |
| "step": 830, | |
| "train_runtime": 11498.286, | |
| "train_tokens_per_second": 7369.186 | |
| }, | |
| { | |
| "epoch": 0.719207579672696, | |
| "grad_norm": 0.6890917187634811, | |
| "learning_rate": 7.14064160490902e-06, | |
| "loss": 0.34748039245605467, | |
| "num_input_tokens_seen": 85244448, | |
| "step": 835, | |
| "train_runtime": 11570.0337, | |
| "train_tokens_per_second": 7367.692 | |
| }, | |
| { | |
| "epoch": 0.7235142118863049, | |
| "grad_norm": 0.5766158583478934, | |
| "learning_rate": 7.110025310909981e-06, | |
| "loss": 0.353003716468811, | |
| "num_input_tokens_seen": 85762344, | |
| "step": 840, | |
| "train_runtime": 11638.7238, | |
| "train_tokens_per_second": 7368.707 | |
| }, | |
| { | |
| "epoch": 0.7278208440999139, | |
| "grad_norm": 0.5823984856746932, | |
| "learning_rate": 7.079312456004941e-06, | |
| "loss": 0.37342329025268556, | |
| "num_input_tokens_seen": 86289224, | |
| "step": 845, | |
| "train_runtime": 11716.1517, | |
| "train_tokens_per_second": 7364.98 | |
| }, | |
| { | |
| "epoch": 0.7321274763135228, | |
| "grad_norm": 0.5927480726961869, | |
| "learning_rate": 7.048504445703623e-06, | |
| "loss": 0.3299787759780884, | |
| "num_input_tokens_seen": 86796928, | |
| "step": 850, | |
| "train_runtime": 11785.4668, | |
| "train_tokens_per_second": 7364.742 | |
| }, | |
| { | |
| "epoch": 0.7364341085271318, | |
| "grad_norm": 0.6321864557945756, | |
| "learning_rate": 7.017602689870345e-06, | |
| "loss": 0.36838767528533933, | |
| "num_input_tokens_seen": 87320496, | |
| "step": 855, | |
| "train_runtime": 11850.93, | |
| "train_tokens_per_second": 7368.24 | |
| }, | |
| { | |
| "epoch": 0.7407407407407407, | |
| "grad_norm": 0.6018531234880375, | |
| "learning_rate": 6.986608602659486e-06, | |
| "loss": 0.3541435718536377, | |
| "num_input_tokens_seen": 87879104, | |
| "step": 860, | |
| "train_runtime": 11917.2964, | |
| "train_tokens_per_second": 7374.081 | |
| }, | |
| { | |
| "epoch": 0.7450473729543498, | |
| "grad_norm": 0.6193130746098626, | |
| "learning_rate": 6.95552360245078e-06, | |
| "loss": 0.35468919277191163, | |
| "num_input_tokens_seen": 88373816, | |
| "step": 865, | |
| "train_runtime": 11986.0817, | |
| "train_tokens_per_second": 7373.036 | |
| }, | |
| { | |
| "epoch": 0.7493540051679587, | |
| "grad_norm": 0.5838704103103464, | |
| "learning_rate": 6.92434911178441e-06, | |
| "loss": 0.36195812225341795, | |
| "num_input_tokens_seen": 88916672, | |
| "step": 870, | |
| "train_runtime": 12056.0128, | |
| "train_tokens_per_second": 7375.297 | |
| }, | |
| { | |
| "epoch": 0.7536606373815676, | |
| "grad_norm": 0.6583966223351327, | |
| "learning_rate": 6.893086557295896e-06, | |
| "loss": 0.3709383726119995, | |
| "num_input_tokens_seen": 89428224, | |
| "step": 875, | |
| "train_runtime": 12120.5201, | |
| "train_tokens_per_second": 7378.25 | |
| }, | |
| { | |
| "epoch": 0.7579672695951766, | |
| "grad_norm": 0.579643853477019, | |
| "learning_rate": 6.861737369650818e-06, | |
| "loss": 0.362162446975708, | |
| "num_input_tokens_seen": 89924488, | |
| "step": 880, | |
| "train_runtime": 12192.2773, | |
| "train_tokens_per_second": 7375.528 | |
| }, | |
| { | |
| "epoch": 0.7622739018087855, | |
| "grad_norm": 0.5904164496483095, | |
| "learning_rate": 6.830302983479344e-06, | |
| "loss": 0.36125888824462893, | |
| "num_input_tokens_seen": 90470424, | |
| "step": 885, | |
| "train_runtime": 12260.5076, | |
| "train_tokens_per_second": 7379.011 | |
| }, | |
| { | |
| "epoch": 0.7665805340223945, | |
| "grad_norm": 0.6357648372036675, | |
| "learning_rate": 6.7987848373105705e-06, | |
| "loss": 0.35643205642700193, | |
| "num_input_tokens_seen": 90996680, | |
| "step": 890, | |
| "train_runtime": 12336.8599, | |
| "train_tokens_per_second": 7376.0 | |
| }, | |
| { | |
| "epoch": 0.7708871662360034, | |
| "grad_norm": 0.6943191292916018, | |
| "learning_rate": 6.767184373506698e-06, | |
| "loss": 0.34829187393188477, | |
| "num_input_tokens_seen": 91480544, | |
| "step": 895, | |
| "train_runtime": 12387.5001, | |
| "train_tokens_per_second": 7384.908 | |
| }, | |
| { | |
| "epoch": 0.7751937984496124, | |
| "grad_norm": 0.7220194168057159, | |
| "learning_rate": 6.73550303819702e-06, | |
| "loss": 0.3527660846710205, | |
| "num_input_tokens_seen": 91969264, | |
| "step": 900, | |
| "train_runtime": 12458.5492, | |
| "train_tokens_per_second": 7382.02 | |
| }, | |
| { | |
| "epoch": 0.7795004306632214, | |
| "grad_norm": 0.7375983296510873, | |
| "learning_rate": 6.70374228121175e-06, | |
| "loss": 0.35948958396911623, | |
| "num_input_tokens_seen": 92456304, | |
| "step": 905, | |
| "train_runtime": 12529.4987, | |
| "train_tokens_per_second": 7379.09 | |
| }, | |
| { | |
| "epoch": 0.7838070628768303, | |
| "grad_norm": 0.6425550494971753, | |
| "learning_rate": 6.671903556015664e-06, | |
| "loss": 0.35416080951690676, | |
| "num_input_tokens_seen": 92982984, | |
| "step": 910, | |
| "train_runtime": 12602.0994, | |
| "train_tokens_per_second": 7378.373 | |
| }, | |
| { | |
| "epoch": 0.7881136950904393, | |
| "grad_norm": 0.6001020126847847, | |
| "learning_rate": 6.639988319641592e-06, | |
| "loss": 0.34634552001953123, | |
| "num_input_tokens_seen": 93461984, | |
| "step": 915, | |
| "train_runtime": 12671.0709, | |
| "train_tokens_per_second": 7376.013 | |
| }, | |
| { | |
| "epoch": 0.7924203273040482, | |
| "grad_norm": 0.6439880804882693, | |
| "learning_rate": 6.607998032623741e-06, | |
| "loss": 0.37337236404418944, | |
| "num_input_tokens_seen": 93979864, | |
| "step": 920, | |
| "train_runtime": 12735.015, | |
| "train_tokens_per_second": 7379.643 | |
| }, | |
| { | |
| "epoch": 0.7967269595176572, | |
| "grad_norm": 0.6979094022106086, | |
| "learning_rate": 6.57593415893085e-06, | |
| "loss": 0.3476158380508423, | |
| "num_input_tokens_seen": 94462232, | |
| "step": 925, | |
| "train_runtime": 12801.7577, | |
| "train_tokens_per_second": 7378.849 | |
| }, | |
| { | |
| "epoch": 0.8010335917312662, | |
| "grad_norm": 0.765921272529139, | |
| "learning_rate": 6.5437981658992e-06, | |
| "loss": 0.35996217727661134, | |
| "num_input_tokens_seen": 94992816, | |
| "step": 930, | |
| "train_runtime": 12865.11, | |
| "train_tokens_per_second": 7383.755 | |
| }, | |
| { | |
| "epoch": 0.8053402239448751, | |
| "grad_norm": 0.599395651309773, | |
| "learning_rate": 6.511591524165465e-06, | |
| "loss": 0.36520819664001464, | |
| "num_input_tokens_seen": 95488328, | |
| "step": 935, | |
| "train_runtime": 12937.4041, | |
| "train_tokens_per_second": 7380.795 | |
| }, | |
| { | |
| "epoch": 0.8096468561584841, | |
| "grad_norm": 0.6451964755773082, | |
| "learning_rate": 6.479315707599407e-06, | |
| "loss": 0.36473860740661623, | |
| "num_input_tokens_seen": 95954480, | |
| "step": 940, | |
| "train_runtime": 12996.4069, | |
| "train_tokens_per_second": 7383.155 | |
| }, | |
| { | |
| "epoch": 0.813953488372093, | |
| "grad_norm": 0.6050461893812418, | |
| "learning_rate": 6.446972193236433e-06, | |
| "loss": 0.3522223472595215, | |
| "num_input_tokens_seen": 96490896, | |
| "step": 945, | |
| "train_runtime": 13056.1311, | |
| "train_tokens_per_second": 7390.466 | |
| }, | |
| { | |
| "epoch": 0.818260120585702, | |
| "grad_norm": 0.6648024183404038, | |
| "learning_rate": 6.414562461209994e-06, | |
| "loss": 0.3675665855407715, | |
| "num_input_tokens_seen": 97032496, | |
| "step": 950, | |
| "train_runtime": 13126.6476, | |
| "train_tokens_per_second": 7392.024 | |
| }, | |
| { | |
| "epoch": 0.8225667527993109, | |
| "grad_norm": 0.6311611856518416, | |
| "learning_rate": 6.3820879946838585e-06, | |
| "loss": 0.3679534435272217, | |
| "num_input_tokens_seen": 97543080, | |
| "step": 955, | |
| "train_runtime": 13201.9771, | |
| "train_tokens_per_second": 7388.521 | |
| }, | |
| { | |
| "epoch": 0.8268733850129198, | |
| "grad_norm": 0.7116714532190532, | |
| "learning_rate": 6.349550279784231e-06, | |
| "loss": 0.3499560594558716, | |
| "num_input_tokens_seen": 98020152, | |
| "step": 960, | |
| "train_runtime": 13274.0305, | |
| "train_tokens_per_second": 7384.355 | |
| }, | |
| { | |
| "epoch": 0.8311800172265289, | |
| "grad_norm": 0.615142461235978, | |
| "learning_rate": 6.316950805531746e-06, | |
| "loss": 0.3565502166748047, | |
| "num_input_tokens_seen": 98498816, | |
| "step": 965, | |
| "train_runtime": 13343.5463, | |
| "train_tokens_per_second": 7381.757 | |
| }, | |
| { | |
| "epoch": 0.8354866494401378, | |
| "grad_norm": 0.6531050926414822, | |
| "learning_rate": 6.284291063773331e-06, | |
| "loss": 0.36150004863739016, | |
| "num_input_tokens_seen": 99045784, | |
| "step": 970, | |
| "train_runtime": 13419.4835, | |
| "train_tokens_per_second": 7380.745 | |
| }, | |
| { | |
| "epoch": 0.8397932816537468, | |
| "grad_norm": 0.6368022854985766, | |
| "learning_rate": 6.251572549113925e-06, | |
| "loss": 0.36032235622406006, | |
| "num_input_tokens_seen": 99579432, | |
| "step": 975, | |
| "train_runtime": 13487.1876, | |
| "train_tokens_per_second": 7383.261 | |
| }, | |
| { | |
| "epoch": 0.8440999138673557, | |
| "grad_norm": 0.6146788889706721, | |
| "learning_rate": 6.218796758848085e-06, | |
| "loss": 0.3623358726501465, | |
| "num_input_tokens_seen": 100089992, | |
| "step": 980, | |
| "train_runtime": 13556.551, | |
| "train_tokens_per_second": 7383.146 | |
| }, | |
| { | |
| "epoch": 0.8484065460809647, | |
| "grad_norm": 0.6116609253098809, | |
| "learning_rate": 6.185965192891472e-06, | |
| "loss": 0.36282594203948976, | |
| "num_input_tokens_seen": 100618600, | |
| "step": 985, | |
| "train_runtime": 13614.4768, | |
| "train_tokens_per_second": 7390.559 | |
| }, | |
| { | |
| "epoch": 0.8527131782945736, | |
| "grad_norm": 0.6846681338486661, | |
| "learning_rate": 6.153079353712201e-06, | |
| "loss": 0.3642581939697266, | |
| "num_input_tokens_seen": 101111776, | |
| "step": 990, | |
| "train_runtime": 13677.4451, | |
| "train_tokens_per_second": 7392.592 | |
| }, | |
| { | |
| "epoch": 0.8570198105081827, | |
| "grad_norm": 0.6305569102019681, | |
| "learning_rate": 6.120140746262091e-06, | |
| "loss": 0.3563115119934082, | |
| "num_input_tokens_seen": 101603528, | |
| "step": 995, | |
| "train_runtime": 13750.1142, | |
| "train_tokens_per_second": 7389.286 | |
| }, | |
| { | |
| "epoch": 0.8613264427217916, | |
| "grad_norm": 0.6356575993962907, | |
| "learning_rate": 6.087150877907786e-06, | |
| "loss": 0.3616267442703247, | |
| "num_input_tokens_seen": 102147912, | |
| "step": 1000, | |
| "train_runtime": 13817.3271, | |
| "train_tokens_per_second": 7392.74 | |
| }, | |
| { | |
| "epoch": 0.8656330749354005, | |
| "grad_norm": 0.7059213261617521, | |
| "learning_rate": 6.054111258361782e-06, | |
| "loss": 0.3602945804595947, | |
| "num_input_tokens_seen": 102659472, | |
| "step": 1005, | |
| "train_runtime": 13880.5359, | |
| "train_tokens_per_second": 7395.93 | |
| }, | |
| { | |
| "epoch": 0.8699397071490095, | |
| "grad_norm": 0.6411294797248545, | |
| "learning_rate": 6.021023399613335e-06, | |
| "loss": 0.34914188385009765, | |
| "num_input_tokens_seen": 103179440, | |
| "step": 1010, | |
| "train_runtime": 13951.8001, | |
| "train_tokens_per_second": 7395.421 | |
| }, | |
| { | |
| "epoch": 0.8742463393626184, | |
| "grad_norm": 0.6072534268937941, | |
| "learning_rate": 5.987888815859266e-06, | |
| "loss": 0.3555158138275146, | |
| "num_input_tokens_seen": 103690584, | |
| "step": 1015, | |
| "train_runtime": 14022.5116, | |
| "train_tokens_per_second": 7394.58 | |
| }, | |
| { | |
| "epoch": 0.8785529715762274, | |
| "grad_norm": 0.6179975691240837, | |
| "learning_rate": 5.954709023434669e-06, | |
| "loss": 0.33635711669921875, | |
| "num_input_tokens_seen": 104178112, | |
| "step": 1020, | |
| "train_runtime": 14085.2268, | |
| "train_tokens_per_second": 7396.268 | |
| }, | |
| { | |
| "epoch": 0.8828596037898363, | |
| "grad_norm": 0.6614614352560472, | |
| "learning_rate": 5.921485540743516e-06, | |
| "loss": 0.35177910327911377, | |
| "num_input_tokens_seen": 104699576, | |
| "step": 1025, | |
| "train_runtime": 14160.14, | |
| "train_tokens_per_second": 7393.965 | |
| }, | |
| { | |
| "epoch": 0.8871662360034454, | |
| "grad_norm": 0.64245424991513, | |
| "learning_rate": 5.888219888189176e-06, | |
| "loss": 0.36098456382751465, | |
| "num_input_tokens_seen": 105214336, | |
| "step": 1030, | |
| "train_runtime": 14213.0669, | |
| "train_tokens_per_second": 7402.648 | |
| }, | |
| { | |
| "epoch": 0.8914728682170543, | |
| "grad_norm": 0.6765874092986255, | |
| "learning_rate": 5.854913588104832e-06, | |
| "loss": 0.37163600921630857, | |
| "num_input_tokens_seen": 105712752, | |
| "step": 1035, | |
| "train_runtime": 14273.3512, | |
| "train_tokens_per_second": 7406.302 | |
| }, | |
| { | |
| "epoch": 0.8957795004306632, | |
| "grad_norm": 0.6635023467337341, | |
| "learning_rate": 5.82156816468382e-06, | |
| "loss": 0.37590489387512205, | |
| "num_input_tokens_seen": 106215336, | |
| "step": 1040, | |
| "train_runtime": 14341.6979, | |
| "train_tokens_per_second": 7406.05 | |
| }, | |
| { | |
| "epoch": 0.9000861326442722, | |
| "grad_norm": 0.6210568645525162, | |
| "learning_rate": 5.788185143909868e-06, | |
| "loss": 0.35026397705078127, | |
| "num_input_tokens_seen": 106737504, | |
| "step": 1045, | |
| "train_runtime": 14414.7002, | |
| "train_tokens_per_second": 7404.768 | |
| }, | |
| { | |
| "epoch": 0.9043927648578811, | |
| "grad_norm": 0.6775230384008981, | |
| "learning_rate": 5.754766053487274e-06, | |
| "loss": 0.3652517080307007, | |
| "num_input_tokens_seen": 107261064, | |
| "step": 1050, | |
| "train_runtime": 14484.8725, | |
| "train_tokens_per_second": 7405.04 | |
| }, | |
| { | |
| "epoch": 0.9086993970714901, | |
| "grad_norm": 0.6034355872415312, | |
| "learning_rate": 5.721312422770984e-06, | |
| "loss": 0.3495129108428955, | |
| "num_input_tokens_seen": 107787160, | |
| "step": 1055, | |
| "train_runtime": 14564.529, | |
| "train_tokens_per_second": 7400.662 | |
| }, | |
| { | |
| "epoch": 0.9130060292850991, | |
| "grad_norm": 0.6266482958161476, | |
| "learning_rate": 5.68782578269661e-06, | |
| "loss": 0.34899396896362306, | |
| "num_input_tokens_seen": 108278376, | |
| "step": 1060, | |
| "train_runtime": 14637.9348, | |
| "train_tokens_per_second": 7397.107 | |
| }, | |
| { | |
| "epoch": 0.917312661498708, | |
| "grad_norm": 0.5978323602685123, | |
| "learning_rate": 5.6543076657103705e-06, | |
| "loss": 0.35709681510925295, | |
| "num_input_tokens_seen": 108741080, | |
| "step": 1065, | |
| "train_runtime": 14703.9074, | |
| "train_tokens_per_second": 7395.387 | |
| }, | |
| { | |
| "epoch": 0.921619293712317, | |
| "grad_norm": 0.5791503709903075, | |
| "learning_rate": 5.620759605698953e-06, | |
| "loss": 0.35440216064453123, | |
| "num_input_tokens_seen": 109252016, | |
| "step": 1070, | |
| "train_runtime": 14783.6598, | |
| "train_tokens_per_second": 7390.052 | |
| }, | |
| { | |
| "epoch": 0.9259259259259259, | |
| "grad_norm": 0.7142563752017141, | |
| "learning_rate": 5.587183137919332e-06, | |
| "loss": 0.3724498748779297, | |
| "num_input_tokens_seen": 109763232, | |
| "step": 1075, | |
| "train_runtime": 14858.0697, | |
| "train_tokens_per_second": 7387.449 | |
| }, | |
| { | |
| "epoch": 0.9302325581395349, | |
| "grad_norm": 0.609481394596669, | |
| "learning_rate": 5.553579798928503e-06, | |
| "loss": 0.35597658157348633, | |
| "num_input_tokens_seen": 110258264, | |
| "step": 1080, | |
| "train_runtime": 14920.315, | |
| "train_tokens_per_second": 7389.808 | |
| }, | |
| { | |
| "epoch": 0.9345391903531438, | |
| "grad_norm": 0.6901948885966874, | |
| "learning_rate": 5.519951126513164e-06, | |
| "loss": 0.366562032699585, | |
| "num_input_tokens_seen": 110761536, | |
| "step": 1085, | |
| "train_runtime": 14989.6087, | |
| "train_tokens_per_second": 7389.221 | |
| }, | |
| { | |
| "epoch": 0.9388458225667528, | |
| "grad_norm": 0.6917267032383838, | |
| "learning_rate": 5.486298659619346e-06, | |
| "loss": 0.3656296730041504, | |
| "num_input_tokens_seen": 111228400, | |
| "step": 1090, | |
| "train_runtime": 15050.3018, | |
| "train_tokens_per_second": 7390.443 | |
| }, | |
| { | |
| "epoch": 0.9431524547803618, | |
| "grad_norm": 0.6752126310495482, | |
| "learning_rate": 5.452623938281983e-06, | |
| "loss": 0.3466034412384033, | |
| "num_input_tokens_seen": 111733504, | |
| "step": 1095, | |
| "train_runtime": 15125.111, | |
| "train_tokens_per_second": 7387.285 | |
| }, | |
| { | |
| "epoch": 0.9474590869939707, | |
| "grad_norm": 0.6713559565663015, | |
| "learning_rate": 5.418928503554436e-06, | |
| "loss": 0.36214759349823, | |
| "num_input_tokens_seen": 112225696, | |
| "step": 1100, | |
| "train_runtime": 15193.5378, | |
| "train_tokens_per_second": 7386.41 | |
| }, | |
| { | |
| "epoch": 0.9517657192075797, | |
| "grad_norm": 0.632679462363844, | |
| "learning_rate": 5.385213897437975e-06, | |
| "loss": 0.3605997085571289, | |
| "num_input_tokens_seen": 112749656, | |
| "step": 1105, | |
| "train_runtime": 15252.8865, | |
| "train_tokens_per_second": 7392.021 | |
| }, | |
| { | |
| "epoch": 0.9560723514211886, | |
| "grad_norm": 0.6951338495408681, | |
| "learning_rate": 5.351481662811208e-06, | |
| "loss": 0.3977372407913208, | |
| "num_input_tokens_seen": 113307680, | |
| "step": 1110, | |
| "train_runtime": 15315.8148, | |
| "train_tokens_per_second": 7398.084 | |
| }, | |
| { | |
| "epoch": 0.9603789836347976, | |
| "grad_norm": 0.7046520621915809, | |
| "learning_rate": 5.3177333433594734e-06, | |
| "loss": 0.3589299201965332, | |
| "num_input_tokens_seen": 113816672, | |
| "step": 1115, | |
| "train_runtime": 15386.2096, | |
| "train_tokens_per_second": 7397.317 | |
| }, | |
| { | |
| "epoch": 0.9646856158484065, | |
| "grad_norm": 0.6263869344134583, | |
| "learning_rate": 5.283970483504198e-06, | |
| "loss": 0.35573978424072267, | |
| "num_input_tokens_seen": 114328160, | |
| "step": 1120, | |
| "train_runtime": 15460.4356, | |
| "train_tokens_per_second": 7394.886 | |
| }, | |
| { | |
| "epoch": 0.9689922480620154, | |
| "grad_norm": 0.5974542867890239, | |
| "learning_rate": 5.2501946283322204e-06, | |
| "loss": 0.3410700798034668, | |
| "num_input_tokens_seen": 114790904, | |
| "step": 1125, | |
| "train_runtime": 15522.3975, | |
| "train_tokens_per_second": 7395.179 | |
| }, | |
| { | |
| "epoch": 0.9732988802756245, | |
| "grad_norm": 0.7266994361124255, | |
| "learning_rate": 5.216407323525083e-06, | |
| "loss": 0.3575610876083374, | |
| "num_input_tokens_seen": 115286400, | |
| "step": 1130, | |
| "train_runtime": 15589.5728, | |
| "train_tokens_per_second": 7395.097 | |
| }, | |
| { | |
| "epoch": 0.9776055124892334, | |
| "grad_norm": 0.6603430551607196, | |
| "learning_rate": 5.182610115288296e-06, | |
| "loss": 0.35471277236938475, | |
| "num_input_tokens_seen": 115819512, | |
| "step": 1135, | |
| "train_runtime": 15664.1723, | |
| "train_tokens_per_second": 7393.912 | |
| }, | |
| { | |
| "epoch": 0.9819121447028424, | |
| "grad_norm": 0.6122815306318921, | |
| "learning_rate": 5.148804550280578e-06, | |
| "loss": 0.35717573165893557, | |
| "num_input_tokens_seen": 116305168, | |
| "step": 1140, | |
| "train_runtime": 15727.3256, | |
| "train_tokens_per_second": 7395.101 | |
| }, | |
| { | |
| "epoch": 0.9862187769164513, | |
| "grad_norm": 0.6081458803821285, | |
| "learning_rate": 5.114992175543084e-06, | |
| "loss": 0.346773099899292, | |
| "num_input_tokens_seen": 116753064, | |
| "step": 1145, | |
| "train_runtime": 15789.1485, | |
| "train_tokens_per_second": 7394.513 | |
| }, | |
| { | |
| "epoch": 0.9905254091300603, | |
| "grad_norm": 0.6199305616103742, | |
| "learning_rate": 5.081174538428596e-06, | |
| "loss": 0.36169114112854006, | |
| "num_input_tokens_seen": 117272904, | |
| "step": 1150, | |
| "train_runtime": 15859.7259, | |
| "train_tokens_per_second": 7394.384 | |
| }, | |
| { | |
| "epoch": 0.9948320413436692, | |
| "grad_norm": 0.5967979270790722, | |
| "learning_rate": 5.047353186530718e-06, | |
| "loss": 0.34629039764404296, | |
| "num_input_tokens_seen": 117758136, | |
| "step": 1155, | |
| "train_runtime": 15925.2581, | |
| "train_tokens_per_second": 7394.426 | |
| }, | |
| { | |
| "epoch": 0.9991386735572783, | |
| "grad_norm": 0.7520097407257402, | |
| "learning_rate": 5.0135296676130576e-06, | |
| "loss": 0.35693745613098143, | |
| "num_input_tokens_seen": 118275664, | |
| "step": 1160, | |
| "train_runtime": 15997.7061, | |
| "train_tokens_per_second": 7393.289 | |
| }, | |
| { | |
| "epoch": 1.0034453057708872, | |
| "grad_norm": 0.6519790289096927, | |
| "learning_rate": 4.979705529538385e-06, | |
| "loss": 0.3211682796478271, | |
| "num_input_tokens_seen": 118772088, | |
| "step": 1165, | |
| "train_runtime": 16067.268, | |
| "train_tokens_per_second": 7392.177 | |
| }, | |
| { | |
| "epoch": 1.0077519379844961, | |
| "grad_norm": 0.7176780843602725, | |
| "learning_rate": 4.9458823201978124e-06, | |
| "loss": 0.3132964611053467, | |
| "num_input_tokens_seen": 119324520, | |
| "step": 1170, | |
| "train_runtime": 16146.0511, | |
| "train_tokens_per_second": 7390.322 | |
| }, | |
| { | |
| "epoch": 1.012058570198105, | |
| "grad_norm": 0.7294575856519293, | |
| "learning_rate": 4.912061587439944e-06, | |
| "loss": 0.32307000160217286, | |
| "num_input_tokens_seen": 119862352, | |
| "step": 1175, | |
| "train_runtime": 16220.7374, | |
| "train_tokens_per_second": 7389.451 | |
| }, | |
| { | |
| "epoch": 1.016365202411714, | |
| "grad_norm": 0.6486717204872143, | |
| "learning_rate": 4.8782448790000525e-06, | |
| "loss": 0.3019747257232666, | |
| "num_input_tokens_seen": 120353472, | |
| "step": 1180, | |
| "train_runtime": 16288.6517, | |
| "train_tokens_per_second": 7388.793 | |
| }, | |
| { | |
| "epoch": 1.020671834625323, | |
| "grad_norm": 0.7028559369295986, | |
| "learning_rate": 4.8444337424292445e-06, | |
| "loss": 0.3043541669845581, | |
| "num_input_tokens_seen": 120865584, | |
| "step": 1185, | |
| "train_runtime": 16369.8645, | |
| "train_tokens_per_second": 7383.42 | |
| }, | |
| { | |
| "epoch": 1.024978466838932, | |
| "grad_norm": 0.7560138997334946, | |
| "learning_rate": 4.81062972502364e-06, | |
| "loss": 0.32289729118347166, | |
| "num_input_tokens_seen": 121382112, | |
| "step": 1190, | |
| "train_runtime": 16445.2618, | |
| "train_tokens_per_second": 7380.978 | |
| }, | |
| { | |
| "epoch": 1.0292850990525408, | |
| "grad_norm": 0.6855270470811776, | |
| "learning_rate": 4.7768343737535694e-06, | |
| "loss": 0.3106250524520874, | |
| "num_input_tokens_seen": 121946272, | |
| "step": 1195, | |
| "train_runtime": 16520.8552, | |
| "train_tokens_per_second": 7381.353 | |
| }, | |
| { | |
| "epoch": 1.0335917312661498, | |
| "grad_norm": 0.5866792589666393, | |
| "learning_rate": 4.743049235192771e-06, | |
| "loss": 0.30351016521453855, | |
| "num_input_tokens_seen": 122456216, | |
| "step": 1200, | |
| "train_runtime": 16592.4847, | |
| "train_tokens_per_second": 7380.222 | |
| }, | |
| { | |
| "epoch": 1.0378983634797587, | |
| "grad_norm": 0.5895059778955265, | |
| "learning_rate": 4.7092758554476215e-06, | |
| "loss": 0.30516183376312256, | |
| "num_input_tokens_seen": 122997032, | |
| "step": 1205, | |
| "train_runtime": 16662.8806, | |
| "train_tokens_per_second": 7381.499 | |
| }, | |
| { | |
| "epoch": 1.0422049956933679, | |
| "grad_norm": 0.7393826499994867, | |
| "learning_rate": 4.6755157800863826e-06, | |
| "loss": 0.30727853775024416, | |
| "num_input_tokens_seen": 123504248, | |
| "step": 1210, | |
| "train_runtime": 16733.5532, | |
| "train_tokens_per_second": 7380.635 | |
| }, | |
| { | |
| "epoch": 1.0465116279069768, | |
| "grad_norm": 0.657650517709326, | |
| "learning_rate": 4.641770554068465e-06, | |
| "loss": 0.2971274137496948, | |
| "num_input_tokens_seen": 124021952, | |
| "step": 1215, | |
| "train_runtime": 16798.7638, | |
| "train_tokens_per_second": 7382.802 | |
| }, | |
| { | |
| "epoch": 1.0508182601205858, | |
| "grad_norm": 0.6783125678934093, | |
| "learning_rate": 4.608041721673731e-06, | |
| "loss": 0.31333255767822266, | |
| "num_input_tokens_seen": 124538536, | |
| "step": 1220, | |
| "train_runtime": 16866.0122, | |
| "train_tokens_per_second": 7383.994 | |
| }, | |
| { | |
| "epoch": 1.0551248923341947, | |
| "grad_norm": 0.68062001198956, | |
| "learning_rate": 4.574330826431822e-06, | |
| "loss": 0.33772854804992675, | |
| "num_input_tokens_seen": 125033152, | |
| "step": 1225, | |
| "train_runtime": 16937.5057, | |
| "train_tokens_per_second": 7382.029 | |
| }, | |
| { | |
| "epoch": 1.0594315245478036, | |
| "grad_norm": 0.668586343087902, | |
| "learning_rate": 4.540639411051528e-06, | |
| "loss": 0.3157934665679932, | |
| "num_input_tokens_seen": 125580048, | |
| "step": 1230, | |
| "train_runtime": 17000.2723, | |
| "train_tokens_per_second": 7386.943 | |
| }, | |
| { | |
| "epoch": 1.0637381567614126, | |
| "grad_norm": 0.5699620351188704, | |
| "learning_rate": 4.506969017350178e-06, | |
| "loss": 0.304073166847229, | |
| "num_input_tokens_seen": 126082048, | |
| "step": 1235, | |
| "train_runtime": 17068.426, | |
| "train_tokens_per_second": 7386.859 | |
| }, | |
| { | |
| "epoch": 1.0680447889750215, | |
| "grad_norm": 0.6139561544523536, | |
| "learning_rate": 4.473321186183091e-06, | |
| "loss": 0.29829227924346924, | |
| "num_input_tokens_seen": 126635088, | |
| "step": 1240, | |
| "train_runtime": 17133.6905, | |
| "train_tokens_per_second": 7390.999 | |
| }, | |
| { | |
| "epoch": 1.0723514211886305, | |
| "grad_norm": 0.6987426072266698, | |
| "learning_rate": 4.439697457373055e-06, | |
| "loss": 0.2966867685317993, | |
| "num_input_tokens_seen": 127113288, | |
| "step": 1245, | |
| "train_runtime": 17192.6486, | |
| "train_tokens_per_second": 7393.467 | |
| }, | |
| { | |
| "epoch": 1.0766580534022394, | |
| "grad_norm": 0.663571335424868, | |
| "learning_rate": 4.40609936963987e-06, | |
| "loss": 0.30222883224487307, | |
| "num_input_tokens_seen": 127635064, | |
| "step": 1250, | |
| "train_runtime": 17248.1503, | |
| "train_tokens_per_second": 7399.928 | |
| }, | |
| { | |
| "epoch": 1.0809646856158484, | |
| "grad_norm": 0.6443165603778871, | |
| "learning_rate": 4.37252846052992e-06, | |
| "loss": 0.3315183877944946, | |
| "num_input_tokens_seen": 128185744, | |
| "step": 1255, | |
| "train_runtime": 17313.1184, | |
| "train_tokens_per_second": 7403.966 | |
| }, | |
| { | |
| "epoch": 1.0852713178294573, | |
| "grad_norm": 0.668552183385809, | |
| "learning_rate": 4.338986266345819e-06, | |
| "loss": 0.31603388786315917, | |
| "num_input_tokens_seen": 128696104, | |
| "step": 1260, | |
| "train_runtime": 17374.9297, | |
| "train_tokens_per_second": 7407.0 | |
| }, | |
| { | |
| "epoch": 1.0895779500430662, | |
| "grad_norm": 0.6529412865019517, | |
| "learning_rate": 4.305474322076102e-06, | |
| "loss": 0.29094948768615725, | |
| "num_input_tokens_seen": 129191096, | |
| "step": 1265, | |
| "train_runtime": 17443.9341, | |
| "train_tokens_per_second": 7406.076 | |
| }, | |
| { | |
| "epoch": 1.0938845822566754, | |
| "grad_norm": 0.7064731523627773, | |
| "learning_rate": 4.271994161324977e-06, | |
| "loss": 0.3133362293243408, | |
| "num_input_tokens_seen": 129671408, | |
| "step": 1270, | |
| "train_runtime": 17512.3895, | |
| "train_tokens_per_second": 7404.553 | |
| }, | |
| { | |
| "epoch": 1.0981912144702843, | |
| "grad_norm": 0.6668035883228985, | |
| "learning_rate": 4.238547316242149e-06, | |
| "loss": 0.3118987321853638, | |
| "num_input_tokens_seen": 130159968, | |
| "step": 1275, | |
| "train_runtime": 17584.2318, | |
| "train_tokens_per_second": 7402.084 | |
| }, | |
| { | |
| "epoch": 1.1024978466838933, | |
| "grad_norm": 0.6201182381091077, | |
| "learning_rate": 4.205135317452701e-06, | |
| "loss": 0.31302499771118164, | |
| "num_input_tokens_seen": 130697176, | |
| "step": 1280, | |
| "train_runtime": 17654.8587, | |
| "train_tokens_per_second": 7402.901 | |
| }, | |
| { | |
| "epoch": 1.1068044788975022, | |
| "grad_norm": 0.7787157406660087, | |
| "learning_rate": 4.171759693987046e-06, | |
| "loss": 0.3269864797592163, | |
| "num_input_tokens_seen": 131192368, | |
| "step": 1285, | |
| "train_runtime": 17714.8718, | |
| "train_tokens_per_second": 7405.776 | |
| }, | |
| { | |
| "epoch": 1.1111111111111112, | |
| "grad_norm": 0.6164244707522502, | |
| "learning_rate": 4.1384219732109544e-06, | |
| "loss": 0.3242049217224121, | |
| "num_input_tokens_seen": 131711328, | |
| "step": 1290, | |
| "train_runtime": 17782.5318, | |
| "train_tokens_per_second": 7406.782 | |
| }, | |
| { | |
| "epoch": 1.11541774332472, | |
| "grad_norm": 0.6247033321680014, | |
| "learning_rate": 4.105123680755667e-06, | |
| "loss": 0.30824267864227295, | |
| "num_input_tokens_seen": 132247936, | |
| "step": 1295, | |
| "train_runtime": 17859.4257, | |
| "train_tokens_per_second": 7404.938 | |
| }, | |
| { | |
| "epoch": 1.119724375538329, | |
| "grad_norm": 1.0131210118875211, | |
| "learning_rate": 4.071866340448062e-06, | |
| "loss": 0.3098948001861572, | |
| "num_input_tokens_seen": 132771952, | |
| "step": 1300, | |
| "train_runtime": 17921.6225, | |
| "train_tokens_per_second": 7408.478 | |
| }, | |
| { | |
| "epoch": 1.124031007751938, | |
| "grad_norm": 0.6160361056861662, | |
| "learning_rate": 4.03865147424093e-06, | |
| "loss": 0.312363338470459, | |
| "num_input_tokens_seen": 133320696, | |
| "step": 1305, | |
| "train_runtime": 18000.4233, | |
| "train_tokens_per_second": 7406.531 | |
| }, | |
| { | |
| "epoch": 1.128337639965547, | |
| "grad_norm": 0.6955496790672944, | |
| "learning_rate": 4.0054806021433245e-06, | |
| "loss": 0.3138570785522461, | |
| "num_input_tokens_seen": 133811104, | |
| "step": 1310, | |
| "train_runtime": 18063.0877, | |
| "train_tokens_per_second": 7407.986 | |
| }, | |
| { | |
| "epoch": 1.1326442721791559, | |
| "grad_norm": 0.6345676045747632, | |
| "learning_rate": 3.9723552421509975e-06, | |
| "loss": 0.2930002212524414, | |
| "num_input_tokens_seen": 134319512, | |
| "step": 1315, | |
| "train_runtime": 18133.0035, | |
| "train_tokens_per_second": 7407.461 | |
| }, | |
| { | |
| "epoch": 1.1369509043927648, | |
| "grad_norm": 0.6268160133086803, | |
| "learning_rate": 3.939276910176939e-06, | |
| "loss": 0.29963154792785646, | |
| "num_input_tokens_seen": 134854032, | |
| "step": 1320, | |
| "train_runtime": 18213.0846, | |
| "train_tokens_per_second": 7404.239 | |
| }, | |
| { | |
| "epoch": 1.1412575366063737, | |
| "grad_norm": 0.7417964459988887, | |
| "learning_rate": 3.906247119981995e-06, | |
| "loss": 0.31452503204345705, | |
| "num_input_tokens_seen": 135369192, | |
| "step": 1325, | |
| "train_runtime": 18272.018, | |
| "train_tokens_per_second": 7408.552 | |
| }, | |
| { | |
| "epoch": 1.1455641688199827, | |
| "grad_norm": 0.6724546111525185, | |
| "learning_rate": 3.873267383105604e-06, | |
| "loss": 0.3095171213150024, | |
| "num_input_tokens_seen": 135915648, | |
| "step": 1330, | |
| "train_runtime": 18336.7572, | |
| "train_tokens_per_second": 7412.197 | |
| }, | |
| { | |
| "epoch": 1.1498708010335918, | |
| "grad_norm": 0.6708401138495464, | |
| "learning_rate": 3.840339208796611e-06, | |
| "loss": 0.32448587417602537, | |
| "num_input_tokens_seen": 136438696, | |
| "step": 1335, | |
| "train_runtime": 18410.0852, | |
| "train_tokens_per_second": 7411.084 | |
| }, | |
| { | |
| "epoch": 1.1541774332472006, | |
| "grad_norm": 0.6395116916821753, | |
| "learning_rate": 3.8074641039442138e-06, | |
| "loss": 0.3084967613220215, | |
| "num_input_tokens_seen": 136955600, | |
| "step": 1340, | |
| "train_runtime": 18478.1216, | |
| "train_tokens_per_second": 7411.771 | |
| }, | |
| { | |
| "epoch": 1.1584840654608097, | |
| "grad_norm": 0.6729984961066279, | |
| "learning_rate": 3.774643573008995e-06, | |
| "loss": 0.28108384609222414, | |
| "num_input_tokens_seen": 137454648, | |
| "step": 1345, | |
| "train_runtime": 18546.9342, | |
| "train_tokens_per_second": 7411.179 | |
| }, | |
| { | |
| "epoch": 1.1627906976744187, | |
| "grad_norm": 0.6670203141700878, | |
| "learning_rate": 3.741879117954077e-06, | |
| "loss": 0.3180356502532959, | |
| "num_input_tokens_seen": 137974504, | |
| "step": 1350, | |
| "train_runtime": 18616.3268, | |
| "train_tokens_per_second": 7411.478 | |
| }, | |
| { | |
| "epoch": 1.1670973298880276, | |
| "grad_norm": 0.6874030328622756, | |
| "learning_rate": 3.709172238176384e-06, | |
| "loss": 0.2953655481338501, | |
| "num_input_tokens_seen": 138476768, | |
| "step": 1355, | |
| "train_runtime": 18680.5568, | |
| "train_tokens_per_second": 7412.882 | |
| }, | |
| { | |
| "epoch": 1.1714039621016366, | |
| "grad_norm": 0.6378347790279159, | |
| "learning_rate": 3.6765244304380323e-06, | |
| "loss": 0.31598858833312987, | |
| "num_input_tokens_seen": 139004040, | |
| "step": 1360, | |
| "train_runtime": 18748.7212, | |
| "train_tokens_per_second": 7414.054 | |
| }, | |
| { | |
| "epoch": 1.1757105943152455, | |
| "grad_norm": 0.6324830387437662, | |
| "learning_rate": 3.643937188797826e-06, | |
| "loss": 0.31764779090881345, | |
| "num_input_tokens_seen": 139527672, | |
| "step": 1365, | |
| "train_runtime": 18813.059, | |
| "train_tokens_per_second": 7416.533 | |
| }, | |
| { | |
| "epoch": 1.1800172265288544, | |
| "grad_norm": 0.6068454280272533, | |
| "learning_rate": 3.611412004542887e-06, | |
| "loss": 0.30363523960113525, | |
| "num_input_tokens_seen": 140051536, | |
| "step": 1370, | |
| "train_runtime": 18882.0382, | |
| "train_tokens_per_second": 7417.183 | |
| }, | |
| { | |
| "epoch": 1.1843238587424634, | |
| "grad_norm": 0.7822567824694513, | |
| "learning_rate": 3.578950366120414e-06, | |
| "loss": 0.3027554750442505, | |
| "num_input_tokens_seen": 140523744, | |
| "step": 1375, | |
| "train_runtime": 18939.4105, | |
| "train_tokens_per_second": 7419.647 | |
| }, | |
| { | |
| "epoch": 1.1886304909560723, | |
| "grad_norm": 0.7142925613608084, | |
| "learning_rate": 3.54655375906956e-06, | |
| "loss": 0.27700581550598147, | |
| "num_input_tokens_seen": 140991200, | |
| "step": 1380, | |
| "train_runtime": 19015.4433, | |
| "train_tokens_per_second": 7414.563 | |
| }, | |
| { | |
| "epoch": 1.1929371231696813, | |
| "grad_norm": 0.736983710804409, | |
| "learning_rate": 3.514223665953455e-06, | |
| "loss": 0.3062134742736816, | |
| "num_input_tokens_seen": 141521152, | |
| "step": 1385, | |
| "train_runtime": 19085.2553, | |
| "train_tokens_per_second": 7415.209 | |
| }, | |
| { | |
| "epoch": 1.1972437553832902, | |
| "grad_norm": 0.6552057330865397, | |
| "learning_rate": 3.481961566291358e-06, | |
| "loss": 0.2952254772186279, | |
| "num_input_tokens_seen": 142052768, | |
| "step": 1390, | |
| "train_runtime": 19153.6423, | |
| "train_tokens_per_second": 7416.489 | |
| }, | |
| { | |
| "epoch": 1.2015503875968991, | |
| "grad_norm": 0.6794659598243468, | |
| "learning_rate": 3.4497689364909483e-06, | |
| "loss": 0.30109152793884275, | |
| "num_input_tokens_seen": 142577656, | |
| "step": 1395, | |
| "train_runtime": 19233.2813, | |
| "train_tokens_per_second": 7413.07 | |
| }, | |
| { | |
| "epoch": 1.2058570198105083, | |
| "grad_norm": 0.5949831747224361, | |
| "learning_rate": 3.417647249780761e-06, | |
| "loss": 0.30313336849212646, | |
| "num_input_tokens_seen": 143091504, | |
| "step": 1400, | |
| "train_runtime": 19289.193, | |
| "train_tokens_per_second": 7418.221 | |
| }, | |
| { | |
| "epoch": 1.210163652024117, | |
| "grad_norm": 0.5687006661433506, | |
| "learning_rate": 3.3855979761427705e-06, | |
| "loss": 0.30561926364898684, | |
| "num_input_tokens_seen": 143579880, | |
| "step": 1405, | |
| "train_runtime": 19352.506, | |
| "train_tokens_per_second": 7419.188 | |
| }, | |
| { | |
| "epoch": 1.2144702842377262, | |
| "grad_norm": 0.5856929019081937, | |
| "learning_rate": 3.3536225822451164e-06, | |
| "loss": 0.3229517459869385, | |
| "num_input_tokens_seen": 144116304, | |
| "step": 1410, | |
| "train_runtime": 19419.2134, | |
| "train_tokens_per_second": 7421.326 | |
| }, | |
| { | |
| "epoch": 1.2187769164513351, | |
| "grad_norm": 0.7023895767304632, | |
| "learning_rate": 3.321722531374988e-06, | |
| "loss": 0.3148001432418823, | |
| "num_input_tokens_seen": 144609456, | |
| "step": 1415, | |
| "train_runtime": 19497.6013, | |
| "train_tokens_per_second": 7416.782 | |
| }, | |
| { | |
| "epoch": 1.223083548664944, | |
| "grad_norm": 0.7334327168318343, | |
| "learning_rate": 3.289899283371657e-06, | |
| "loss": 0.3218817710876465, | |
| "num_input_tokens_seen": 145088128, | |
| "step": 1420, | |
| "train_runtime": 19562.664, | |
| "train_tokens_per_second": 7416.583 | |
| }, | |
| { | |
| "epoch": 1.227390180878553, | |
| "grad_norm": 0.6787852523222232, | |
| "learning_rate": 3.258154294559671e-06, | |
| "loss": 0.3102770566940308, | |
| "num_input_tokens_seen": 145589104, | |
| "step": 1425, | |
| "train_runtime": 19633.9236, | |
| "train_tokens_per_second": 7415.181 | |
| }, | |
| { | |
| "epoch": 1.231696813092162, | |
| "grad_norm": 0.5985537130657856, | |
| "learning_rate": 3.2264890176822137e-06, | |
| "loss": 0.28916454315185547, | |
| "num_input_tokens_seen": 146084280, | |
| "step": 1430, | |
| "train_runtime": 19704.3502, | |
| "train_tokens_per_second": 7413.809 | |
| }, | |
| { | |
| "epoch": 1.2360034453057709, | |
| "grad_norm": 0.6605779036797204, | |
| "learning_rate": 3.194904901834613e-06, | |
| "loss": 0.2934903860092163, | |
| "num_input_tokens_seen": 146581160, | |
| "step": 1435, | |
| "train_runtime": 19766.1983, | |
| "train_tokens_per_second": 7415.749 | |
| }, | |
| { | |
| "epoch": 1.2403100775193798, | |
| "grad_norm": 0.6981230112849379, | |
| "learning_rate": 3.1634033923980336e-06, | |
| "loss": 0.2953122854232788, | |
| "num_input_tokens_seen": 147076232, | |
| "step": 1440, | |
| "train_runtime": 19829.6949, | |
| "train_tokens_per_second": 7416.969 | |
| }, | |
| { | |
| "epoch": 1.2446167097329888, | |
| "grad_norm": 0.6739980887459467, | |
| "learning_rate": 3.131985930973329e-06, | |
| "loss": 0.3041204929351807, | |
| "num_input_tokens_seen": 147584128, | |
| "step": 1445, | |
| "train_runtime": 19894.1589, | |
| "train_tokens_per_second": 7418.465 | |
| }, | |
| { | |
| "epoch": 1.2489233419465977, | |
| "grad_norm": 0.5697654101198504, | |
| "learning_rate": 3.1006539553150727e-06, | |
| "loss": 0.3094919204711914, | |
| "num_input_tokens_seen": 148127248, | |
| "step": 1450, | |
| "train_runtime": 19971.1142, | |
| "train_tokens_per_second": 7417.075 | |
| }, | |
| { | |
| "epoch": 1.2532299741602067, | |
| "grad_norm": 0.7121831368767915, | |
| "learning_rate": 3.0694088992657617e-06, | |
| "loss": 0.31122531890869143, | |
| "num_input_tokens_seen": 148624776, | |
| "step": 1455, | |
| "train_runtime": 20037.7999, | |
| "train_tokens_per_second": 7417.22 | |
| }, | |
| { | |
| "epoch": 1.2575366063738156, | |
| "grad_norm": 0.6188148109575211, | |
| "learning_rate": 3.0382521926901946e-06, | |
| "loss": 0.30587000846862794, | |
| "num_input_tokens_seen": 149084784, | |
| "step": 1460, | |
| "train_runtime": 20101.6364, | |
| "train_tokens_per_second": 7416.55 | |
| }, | |
| { | |
| "epoch": 1.2618432385874248, | |
| "grad_norm": 0.6011511034119341, | |
| "learning_rate": 3.0071852614100427e-06, | |
| "loss": 0.3310825347900391, | |
| "num_input_tokens_seen": 149611248, | |
| "step": 1465, | |
| "train_runtime": 20175.4197, | |
| "train_tokens_per_second": 7415.521 | |
| }, | |
| { | |
| "epoch": 1.2661498708010335, | |
| "grad_norm": 0.7576333400833488, | |
| "learning_rate": 2.9762095271385995e-06, | |
| "loss": 0.31157741546630857, | |
| "num_input_tokens_seen": 150113280, | |
| "step": 1470, | |
| "train_runtime": 20245.8465, | |
| "train_tokens_per_second": 7414.522 | |
| }, | |
| { | |
| "epoch": 1.2704565030146426, | |
| "grad_norm": 0.6244530019532303, | |
| "learning_rate": 2.9453264074157134e-06, | |
| "loss": 0.29090151786804197, | |
| "num_input_tokens_seen": 150580240, | |
| "step": 1475, | |
| "train_runtime": 20317.2584, | |
| "train_tokens_per_second": 7411.445 | |
| }, | |
| { | |
| "epoch": 1.2747631352282516, | |
| "grad_norm": 0.6748375953775086, | |
| "learning_rate": 2.9145373155429263e-06, | |
| "loss": 0.3190771102905273, | |
| "num_input_tokens_seen": 151097168, | |
| "step": 1480, | |
| "train_runtime": 20380.9093, | |
| "train_tokens_per_second": 7413.662 | |
| }, | |
| { | |
| "epoch": 1.2790697674418605, | |
| "grad_norm": 0.6010186642646648, | |
| "learning_rate": 2.883843660518787e-06, | |
| "loss": 0.31575181484222414, | |
| "num_input_tokens_seen": 151620032, | |
| "step": 1485, | |
| "train_runtime": 20445.4483, | |
| "train_tokens_per_second": 7415.833 | |
| }, | |
| { | |
| "epoch": 1.2833763996554695, | |
| "grad_norm": 0.6452504243127156, | |
| "learning_rate": 2.853246846974382e-06, | |
| "loss": 0.31672372817993166, | |
| "num_input_tokens_seen": 152160432, | |
| "step": 1490, | |
| "train_runtime": 20522.0963, | |
| "train_tokens_per_second": 7414.468 | |
| }, | |
| { | |
| "epoch": 1.2876830318690784, | |
| "grad_norm": 0.6803092051264894, | |
| "learning_rate": 2.8227482751090445e-06, | |
| "loss": 0.3057991027832031, | |
| "num_input_tokens_seen": 152629192, | |
| "step": 1495, | |
| "train_runtime": 20576.0761, | |
| "train_tokens_per_second": 7417.799 | |
| }, | |
| { | |
| "epoch": 1.2919896640826873, | |
| "grad_norm": 0.6639737361974689, | |
| "learning_rate": 2.7923493406262836e-06, | |
| "loss": 0.3172436237335205, | |
| "num_input_tokens_seen": 153140224, | |
| "step": 1500, | |
| "train_runtime": 20645.2633, | |
| "train_tokens_per_second": 7417.693 | |
| }, | |
| { | |
| "epoch": 1.2962962962962963, | |
| "grad_norm": 0.6227705768459294, | |
| "learning_rate": 2.7620514346699103e-06, | |
| "loss": 0.2983067512512207, | |
| "num_input_tokens_seen": 153656144, | |
| "step": 1505, | |
| "train_runtime": 20719.2912, | |
| "train_tokens_per_second": 7416.091 | |
| }, | |
| { | |
| "epoch": 1.3006029285099052, | |
| "grad_norm": 0.7307371833087898, | |
| "learning_rate": 2.73185594376038e-06, | |
| "loss": 0.31225831508636476, | |
| "num_input_tokens_seen": 154153816, | |
| "step": 1510, | |
| "train_runtime": 20786.1098, | |
| "train_tokens_per_second": 7416.194 | |
| }, | |
| { | |
| "epoch": 1.3049095607235142, | |
| "grad_norm": 0.6309746385583832, | |
| "learning_rate": 2.7017642497313324e-06, | |
| "loss": 0.302036190032959, | |
| "num_input_tokens_seen": 154670920, | |
| "step": 1515, | |
| "train_runtime": 20854.4481, | |
| "train_tokens_per_second": 7416.687 | |
| }, | |
| { | |
| "epoch": 1.309216192937123, | |
| "grad_norm": 0.6416098423112634, | |
| "learning_rate": 2.671777729666363e-06, | |
| "loss": 0.3167644262313843, | |
| "num_input_tokens_seen": 155192016, | |
| "step": 1520, | |
| "train_runtime": 20930.5858, | |
| "train_tokens_per_second": 7414.605 | |
| }, | |
| { | |
| "epoch": 1.313522825150732, | |
| "grad_norm": 0.6604523059153524, | |
| "learning_rate": 2.641897755835997e-06, | |
| "loss": 0.30224175453186036, | |
| "num_input_tokens_seen": 155707640, | |
| "step": 1525, | |
| "train_runtime": 21004.7702, | |
| "train_tokens_per_second": 7412.966 | |
| }, | |
| { | |
| "epoch": 1.3178294573643412, | |
| "grad_norm": 0.6284223784527019, | |
| "learning_rate": 2.6121256956348966e-06, | |
| "loss": 0.31644363403320314, | |
| "num_input_tokens_seen": 156250728, | |
| "step": 1530, | |
| "train_runtime": 21079.0736, | |
| "train_tokens_per_second": 7412.599 | |
| }, | |
| { | |
| "epoch": 1.32213608957795, | |
| "grad_norm": 0.6796758501164194, | |
| "learning_rate": 2.58246291151928e-06, | |
| "loss": 0.3076641082763672, | |
| "num_input_tokens_seen": 156749112, | |
| "step": 1535, | |
| "train_runtime": 21151.8237, | |
| "train_tokens_per_second": 7410.667 | |
| }, | |
| { | |
| "epoch": 1.326442721791559, | |
| "grad_norm": 0.6121495038952994, | |
| "learning_rate": 2.5529107609445737e-06, | |
| "loss": 0.3091968774795532, | |
| "num_input_tokens_seen": 157258624, | |
| "step": 1540, | |
| "train_runtime": 21223.059, | |
| "train_tokens_per_second": 7409.8 | |
| }, | |
| { | |
| "epoch": 1.330749354005168, | |
| "grad_norm": 0.6749697046548327, | |
| "learning_rate": 2.5234705963032917e-06, | |
| "loss": 0.31270341873168944, | |
| "num_input_tokens_seen": 157737392, | |
| "step": 1545, | |
| "train_runtime": 21293.431, | |
| "train_tokens_per_second": 7407.796 | |
| }, | |
| { | |
| "epoch": 1.335055986218777, | |
| "grad_norm": 0.7232023270557488, | |
| "learning_rate": 2.494143764863142e-06, | |
| "loss": 0.29509544372558594, | |
| "num_input_tokens_seen": 158223688, | |
| "step": 1550, | |
| "train_runtime": 21357.9775, | |
| "train_tokens_per_second": 7408.177 | |
| }, | |
| { | |
| "epoch": 1.339362618432386, | |
| "grad_norm": 0.6512809834688581, | |
| "learning_rate": 2.464931608705384e-06, | |
| "loss": 0.28979308605194093, | |
| "num_input_tokens_seen": 158688096, | |
| "step": 1555, | |
| "train_runtime": 21415.5307, | |
| "train_tokens_per_second": 7409.954 | |
| }, | |
| { | |
| "epoch": 1.3436692506459949, | |
| "grad_norm": 0.6519868412863893, | |
| "learning_rate": 2.4358354646633915e-06, | |
| "loss": 0.3088102102279663, | |
| "num_input_tokens_seen": 159187424, | |
| "step": 1560, | |
| "train_runtime": 21483.2722, | |
| "train_tokens_per_second": 7409.831 | |
| }, | |
| { | |
| "epoch": 1.3479758828596038, | |
| "grad_norm": 0.611925227867202, | |
| "learning_rate": 2.4068566642614923e-06, | |
| "loss": 0.29939565658569334, | |
| "num_input_tokens_seen": 159669688, | |
| "step": 1565, | |
| "train_runtime": 21550.1291, | |
| "train_tokens_per_second": 7409.222 | |
| }, | |
| { | |
| "epoch": 1.3522825150732127, | |
| "grad_norm": 0.6563878117514377, | |
| "learning_rate": 2.3779965336540237e-06, | |
| "loss": 0.3042983770370483, | |
| "num_input_tokens_seen": 160172312, | |
| "step": 1570, | |
| "train_runtime": 21611.6367, | |
| "train_tokens_per_second": 7411.392 | |
| }, | |
| { | |
| "epoch": 1.3565891472868217, | |
| "grad_norm": 0.6120474875361092, | |
| "learning_rate": 2.3492563935646493e-06, | |
| "loss": 0.2935898542404175, | |
| "num_input_tokens_seen": 160708952, | |
| "step": 1575, | |
| "train_runtime": 21688.22, | |
| "train_tokens_per_second": 7409.965 | |
| }, | |
| { | |
| "epoch": 1.3608957795004306, | |
| "grad_norm": 0.607518756395221, | |
| "learning_rate": 2.3206375592259205e-06, | |
| "loss": 0.30011003017425536, | |
| "num_input_tokens_seen": 161217216, | |
| "step": 1580, | |
| "train_runtime": 21761.1281, | |
| "train_tokens_per_second": 7408.495 | |
| }, | |
| { | |
| "epoch": 1.3652024117140396, | |
| "grad_norm": 0.6300850271418421, | |
| "learning_rate": 2.2921413403190774e-06, | |
| "loss": 0.3024036645889282, | |
| "num_input_tokens_seen": 161705312, | |
| "step": 1585, | |
| "train_runtime": 21836.3225, | |
| "train_tokens_per_second": 7405.336 | |
| }, | |
| { | |
| "epoch": 1.3695090439276485, | |
| "grad_norm": 0.6230690301238715, | |
| "learning_rate": 2.263769040914125e-06, | |
| "loss": 0.30027027130126954, | |
| "num_input_tokens_seen": 162163072, | |
| "step": 1590, | |
| "train_runtime": 21896.5811, | |
| "train_tokens_per_second": 7405.863 | |
| }, | |
| { | |
| "epoch": 1.3738156761412577, | |
| "grad_norm": 0.6928798019826016, | |
| "learning_rate": 2.2355219594101483e-06, | |
| "loss": 0.3255805492401123, | |
| "num_input_tokens_seen": 162678312, | |
| "step": 1595, | |
| "train_runtime": 21966.5196, | |
| "train_tokens_per_second": 7405.739 | |
| }, | |
| { | |
| "epoch": 1.3781223083548664, | |
| "grad_norm": 0.6363368596470697, | |
| "learning_rate": 2.2074013884758993e-06, | |
| "loss": 0.31427993774414065, | |
| "num_input_tokens_seen": 163224632, | |
| "step": 1600, | |
| "train_runtime": 22032.9938, | |
| "train_tokens_per_second": 7408.191 | |
| }, | |
| { | |
| "epoch": 1.3824289405684755, | |
| "grad_norm": 0.6419634556123296, | |
| "learning_rate": 2.179408614990635e-06, | |
| "loss": 0.3022661924362183, | |
| "num_input_tokens_seen": 163699352, | |
| "step": 1605, | |
| "train_runtime": 22099.9328, | |
| "train_tokens_per_second": 7407.233 | |
| }, | |
| { | |
| "epoch": 1.3867355727820845, | |
| "grad_norm": 0.5818964338969693, | |
| "learning_rate": 2.1515449199852307e-06, | |
| "loss": 0.31579549312591554, | |
| "num_input_tokens_seen": 164236200, | |
| "step": 1610, | |
| "train_runtime": 22170.2317, | |
| "train_tokens_per_second": 7407.96 | |
| }, | |
| { | |
| "epoch": 1.3910422049956934, | |
| "grad_norm": 0.6207480539135273, | |
| "learning_rate": 2.1238115785835512e-06, | |
| "loss": 0.31688437461853025, | |
| "num_input_tokens_seen": 164733432, | |
| "step": 1615, | |
| "train_runtime": 22241.2909, | |
| "train_tokens_per_second": 7406.649 | |
| }, | |
| { | |
| "epoch": 1.3953488372093024, | |
| "grad_norm": 0.6195171977696273, | |
| "learning_rate": 2.0962098599441065e-06, | |
| "loss": 0.3063546657562256, | |
| "num_input_tokens_seen": 165233040, | |
| "step": 1620, | |
| "train_runtime": 22304.129, | |
| "train_tokens_per_second": 7408.182 | |
| }, | |
| { | |
| "epoch": 1.3996554694229113, | |
| "grad_norm": 0.6192636575391592, | |
| "learning_rate": 2.068741027201961e-06, | |
| "loss": 0.28777213096618653, | |
| "num_input_tokens_seen": 165704392, | |
| "step": 1625, | |
| "train_runtime": 22371.6308, | |
| "train_tokens_per_second": 7406.898 | |
| }, | |
| { | |
| "epoch": 1.4039621016365202, | |
| "grad_norm": 0.5840180544779381, | |
| "learning_rate": 2.0414063374109326e-06, | |
| "loss": 0.3152973413467407, | |
| "num_input_tokens_seen": 166213024, | |
| "step": 1630, | |
| "train_runtime": 22439.0022, | |
| "train_tokens_per_second": 7407.327 | |
| }, | |
| { | |
| "epoch": 1.4082687338501292, | |
| "grad_norm": 0.6182614190184378, | |
| "learning_rate": 2.0142070414860704e-06, | |
| "loss": 0.29006712436676024, | |
| "num_input_tokens_seen": 166683888, | |
| "step": 1635, | |
| "train_runtime": 22498.0177, | |
| "train_tokens_per_second": 7408.826 | |
| }, | |
| { | |
| "epoch": 1.4125753660637381, | |
| "grad_norm": 0.6297841421388773, | |
| "learning_rate": 1.9871443841464004e-06, | |
| "loss": 0.3009150505065918, | |
| "num_input_tokens_seen": 167208664, | |
| "step": 1640, | |
| "train_runtime": 22570.3637, | |
| "train_tokens_per_second": 7408.328 | |
| }, | |
| { | |
| "epoch": 1.416881998277347, | |
| "grad_norm": 0.6550334755642108, | |
| "learning_rate": 1.9602196038579774e-06, | |
| "loss": 0.308090877532959, | |
| "num_input_tokens_seen": 167711896, | |
| "step": 1645, | |
| "train_runtime": 22636.4806, | |
| "train_tokens_per_second": 7408.921 | |
| }, | |
| { | |
| "epoch": 1.421188630490956, | |
| "grad_norm": 0.6656593254119286, | |
| "learning_rate": 1.933433932777193e-06, | |
| "loss": 0.31527588367462156, | |
| "num_input_tokens_seen": 168253824, | |
| "step": 1650, | |
| "train_runtime": 22710.0595, | |
| "train_tokens_per_second": 7408.78 | |
| }, | |
| { | |
| "epoch": 1.425495262704565, | |
| "grad_norm": 0.6870898791567921, | |
| "learning_rate": 1.9067885966943983e-06, | |
| "loss": 0.2953861474990845, | |
| "num_input_tokens_seen": 168753216, | |
| "step": 1655, | |
| "train_runtime": 22778.2895, | |
| "train_tokens_per_second": 7408.511 | |
| }, | |
| { | |
| "epoch": 1.429801894918174, | |
| "grad_norm": 0.6715846070836667, | |
| "learning_rate": 1.880284814977807e-06, | |
| "loss": 0.3124719142913818, | |
| "num_input_tokens_seen": 169237536, | |
| "step": 1660, | |
| "train_runtime": 22844.2856, | |
| "train_tokens_per_second": 7408.309 | |
| }, | |
| { | |
| "epoch": 1.4341085271317828, | |
| "grad_norm": 0.5885167014186917, | |
| "learning_rate": 1.8539238005176912e-06, | |
| "loss": 0.28069252967834474, | |
| "num_input_tokens_seen": 169742192, | |
| "step": 1665, | |
| "train_runtime": 22917.8396, | |
| "train_tokens_per_second": 7406.553 | |
| }, | |
| { | |
| "epoch": 1.438415159345392, | |
| "grad_norm": 0.6406321645578682, | |
| "learning_rate": 1.8277067596708793e-06, | |
| "loss": 0.30810205936431884, | |
| "num_input_tokens_seen": 170221760, | |
| "step": 1670, | |
| "train_runtime": 22987.4305, | |
| "train_tokens_per_second": 7404.993 | |
| }, | |
| { | |
| "epoch": 1.442721791559001, | |
| "grad_norm": 0.6177250258017325, | |
| "learning_rate": 1.8016348922055448e-06, | |
| "loss": 0.3010256767272949, | |
| "num_input_tokens_seen": 170720016, | |
| "step": 1675, | |
| "train_runtime": 23056.7807, | |
| "train_tokens_per_second": 7404.33 | |
| }, | |
| { | |
| "epoch": 1.4470284237726099, | |
| "grad_norm": 0.6542576135864537, | |
| "learning_rate": 1.7757093912463058e-06, | |
| "loss": 0.30887162685394287, | |
| "num_input_tokens_seen": 171232760, | |
| "step": 1680, | |
| "train_runtime": 23123.5146, | |
| "train_tokens_per_second": 7405.136 | |
| }, | |
| { | |
| "epoch": 1.4513350559862188, | |
| "grad_norm": 0.6205881235437254, | |
| "learning_rate": 1.7499314432196257e-06, | |
| "loss": 0.3016911506652832, | |
| "num_input_tokens_seen": 171731496, | |
| "step": 1685, | |
| "train_runtime": 23196.0481, | |
| "train_tokens_per_second": 7403.481 | |
| }, | |
| { | |
| "epoch": 1.4556416881998278, | |
| "grad_norm": 0.6593735329024515, | |
| "learning_rate": 1.7243022277995109e-06, | |
| "loss": 0.3081092357635498, | |
| "num_input_tokens_seen": 172245912, | |
| "step": 1690, | |
| "train_runtime": 23270.0505, | |
| "train_tokens_per_second": 7402.043 | |
| }, | |
| { | |
| "epoch": 1.4599483204134367, | |
| "grad_norm": 0.6421196878908372, | |
| "learning_rate": 1.698822917853532e-06, | |
| "loss": 0.3349776268005371, | |
| "num_input_tokens_seen": 172780776, | |
| "step": 1695, | |
| "train_runtime": 23338.4767, | |
| "train_tokens_per_second": 7403.259 | |
| }, | |
| { | |
| "epoch": 1.4642549526270456, | |
| "grad_norm": 0.6393067681048293, | |
| "learning_rate": 1.6734946793891498e-06, | |
| "loss": 0.2948931694030762, | |
| "num_input_tokens_seen": 173260968, | |
| "step": 1700, | |
| "train_runtime": 23393.7968, | |
| "train_tokens_per_second": 7406.278 | |
| }, | |
| { | |
| "epoch": 1.4685615848406546, | |
| "grad_norm": 0.626157385220703, | |
| "learning_rate": 1.6483186715003523e-06, | |
| "loss": 0.3255448818206787, | |
| "num_input_tokens_seen": 173774184, | |
| "step": 1705, | |
| "train_runtime": 23453.7592, | |
| "train_tokens_per_second": 7409.225 | |
| }, | |
| { | |
| "epoch": 1.4728682170542635, | |
| "grad_norm": 0.6000516868104079, | |
| "learning_rate": 1.623296046314612e-06, | |
| "loss": 0.3152692556381226, | |
| "num_input_tokens_seen": 174299296, | |
| "step": 1710, | |
| "train_runtime": 23525.525, | |
| "train_tokens_per_second": 7408.944 | |
| }, | |
| { | |
| "epoch": 1.4771748492678725, | |
| "grad_norm": 0.6223596745571496, | |
| "learning_rate": 1.5984279489401655e-06, | |
| "loss": 0.3075227737426758, | |
| "num_input_tokens_seen": 174801056, | |
| "step": 1715, | |
| "train_runtime": 23599.8686, | |
| "train_tokens_per_second": 7406.866 | |
| }, | |
| { | |
| "epoch": 1.4814814814814814, | |
| "grad_norm": 0.6218964277556629, | |
| "learning_rate": 1.5737155174136042e-06, | |
| "loss": 0.2994909524917603, | |
| "num_input_tokens_seen": 175312064, | |
| "step": 1720, | |
| "train_runtime": 23669.1828, | |
| "train_tokens_per_second": 7406.765 | |
| }, | |
| { | |
| "epoch": 1.4857881136950906, | |
| "grad_norm": 0.6927663365147764, | |
| "learning_rate": 1.5491598826477967e-06, | |
| "loss": 0.2942391872406006, | |
| "num_input_tokens_seen": 175813120, | |
| "step": 1725, | |
| "train_runtime": 23735.9044, | |
| "train_tokens_per_second": 7407.054 | |
| }, | |
| { | |
| "epoch": 1.4900947459086993, | |
| "grad_norm": 0.6770702789982874, | |
| "learning_rate": 1.5247621683801344e-06, | |
| "loss": 0.30359663963317873, | |
| "num_input_tokens_seen": 176311208, | |
| "step": 1730, | |
| "train_runtime": 23813.1099, | |
| "train_tokens_per_second": 7403.956 | |
| }, | |
| { | |
| "epoch": 1.4944013781223084, | |
| "grad_norm": 0.6876682910616844, | |
| "learning_rate": 1.500523491121108e-06, | |
| "loss": 0.30945577621459963, | |
| "num_input_tokens_seen": 176837256, | |
| "step": 1735, | |
| "train_runtime": 23875.8608, | |
| "train_tokens_per_second": 7406.529 | |
| }, | |
| { | |
| "epoch": 1.4987080103359174, | |
| "grad_norm": 0.6409833412888383, | |
| "learning_rate": 1.476444960103211e-06, | |
| "loss": 0.3017910957336426, | |
| "num_input_tokens_seen": 177342432, | |
| "step": 1740, | |
| "train_runtime": 23950.1948, | |
| "train_tokens_per_second": 7404.634 | |
| }, | |
| { | |
| "epoch": 1.5030146425495263, | |
| "grad_norm": 0.6943474108994365, | |
| "learning_rate": 1.4525276772301761e-06, | |
| "loss": 0.2982076644897461, | |
| "num_input_tokens_seen": 177837496, | |
| "step": 1745, | |
| "train_runtime": 24022.3232, | |
| "train_tokens_per_second": 7403.01 | |
| }, | |
| { | |
| "epoch": 1.5073212747631353, | |
| "grad_norm": 0.5799829679643863, | |
| "learning_rate": 1.4287727370265558e-06, | |
| "loss": 0.305989408493042, | |
| "num_input_tokens_seen": 178367400, | |
| "step": 1750, | |
| "train_runtime": 24098.0102, | |
| "train_tokens_per_second": 7401.748 | |
| }, | |
| { | |
| "epoch": 1.5116279069767442, | |
| "grad_norm": 0.6398941855178756, | |
| "learning_rate": 1.4051812265876257e-06, | |
| "loss": 0.31029796600341797, | |
| "num_input_tokens_seen": 178879808, | |
| "step": 1755, | |
| "train_runtime": 24168.3597, | |
| "train_tokens_per_second": 7401.405 | |
| }, | |
| { | |
| "epoch": 1.5159345391903531, | |
| "grad_norm": 0.6325656951057348, | |
| "learning_rate": 1.381754225529639e-06, | |
| "loss": 0.29440352916717527, | |
| "num_input_tokens_seen": 179399168, | |
| "step": 1760, | |
| "train_runtime": 24235.603, | |
| "train_tokens_per_second": 7402.299 | |
| }, | |
| { | |
| "epoch": 1.520241171403962, | |
| "grad_norm": 0.7760734610328013, | |
| "learning_rate": 1.3584928059404207e-06, | |
| "loss": 0.3081679344177246, | |
| "num_input_tokens_seen": 179913984, | |
| "step": 1765, | |
| "train_runtime": 24301.9048, | |
| "train_tokens_per_second": 7403.287 | |
| }, | |
| { | |
| "epoch": 1.524547803617571, | |
| "grad_norm": 0.6020927671705726, | |
| "learning_rate": 1.3353980323303067e-06, | |
| "loss": 0.28179948329925536, | |
| "num_input_tokens_seen": 180380992, | |
| "step": 1770, | |
| "train_runtime": 24364.2331, | |
| "train_tokens_per_second": 7403.516 | |
| }, | |
| { | |
| "epoch": 1.52885443583118, | |
| "grad_norm": 0.6775155388063839, | |
| "learning_rate": 1.3124709615834263e-06, | |
| "loss": 0.3199465274810791, | |
| "num_input_tokens_seen": 180895408, | |
| "step": 1775, | |
| "train_runtime": 24428.2069, | |
| "train_tokens_per_second": 7405.186 | |
| }, | |
| { | |
| "epoch": 1.5331610680447891, | |
| "grad_norm": 0.5909440385527984, | |
| "learning_rate": 1.2897126429093354e-06, | |
| "loss": 0.31137895584106445, | |
| "num_input_tokens_seen": 181405216, | |
| "step": 1780, | |
| "train_runtime": 24501.3699, | |
| "train_tokens_per_second": 7403.881 | |
| }, | |
| { | |
| "epoch": 1.5374677002583979, | |
| "grad_norm": 0.6389531630672336, | |
| "learning_rate": 1.2671241177950078e-06, | |
| "loss": 0.2975735902786255, | |
| "num_input_tokens_seen": 181929944, | |
| "step": 1785, | |
| "train_runtime": 24574.289, | |
| "train_tokens_per_second": 7403.264 | |
| }, | |
| { | |
| "epoch": 1.541774332472007, | |
| "grad_norm": 0.6814051618467906, | |
| "learning_rate": 1.2447064199571657e-06, | |
| "loss": 0.30163776874542236, | |
| "num_input_tokens_seen": 182464632, | |
| "step": 1790, | |
| "train_runtime": 24647.5158, | |
| "train_tokens_per_second": 7402.962 | |
| }, | |
| { | |
| "epoch": 1.5460809646856157, | |
| "grad_norm": 0.7477112566892565, | |
| "learning_rate": 1.2224605752949786e-06, | |
| "loss": 0.2984232664108276, | |
| "num_input_tokens_seen": 182958712, | |
| "step": 1795, | |
| "train_runtime": 24708.0115, | |
| "train_tokens_per_second": 7404.834 | |
| }, | |
| { | |
| "epoch": 1.550387596899225, | |
| "grad_norm": 0.6529963125920666, | |
| "learning_rate": 1.2003876018431133e-06, | |
| "loss": 0.29271659851074217, | |
| "num_input_tokens_seen": 183435376, | |
| "step": 1800, | |
| "train_runtime": 24760.234, | |
| "train_tokens_per_second": 7408.467 | |
| }, | |
| { | |
| "epoch": 1.5546942291128336, | |
| "grad_norm": 0.5835418021024796, | |
| "learning_rate": 1.1784885097251474e-06, | |
| "loss": 0.2989810466766357, | |
| "num_input_tokens_seen": 183939080, | |
| "step": 1805, | |
| "train_runtime": 24816.2993, | |
| "train_tokens_per_second": 7412.027 | |
| }, | |
| { | |
| "epoch": 1.5590008613264428, | |
| "grad_norm": 0.6843482118934102, | |
| "learning_rate": 1.1567643011073393e-06, | |
| "loss": 0.31319284439086914, | |
| "num_input_tokens_seen": 184492224, | |
| "step": 1810, | |
| "train_runtime": 24887.0045, | |
| "train_tokens_per_second": 7413.195 | |
| }, | |
| { | |
| "epoch": 1.5633074935400517, | |
| "grad_norm": 0.6189022938446993, | |
| "learning_rate": 1.1352159701527743e-06, | |
| "loss": 0.3045823574066162, | |
| "num_input_tokens_seen": 185017248, | |
| "step": 1815, | |
| "train_runtime": 24965.7799, | |
| "train_tokens_per_second": 7410.834 | |
| }, | |
| { | |
| "epoch": 1.5676141257536607, | |
| "grad_norm": 0.6412913917213614, | |
| "learning_rate": 1.1138445029758594e-06, | |
| "loss": 0.29555253982543944, | |
| "num_input_tokens_seen": 185525392, | |
| "step": 1820, | |
| "train_runtime": 25039.0119, | |
| "train_tokens_per_second": 7409.453 | |
| }, | |
| { | |
| "epoch": 1.5719207579672696, | |
| "grad_norm": 0.6383974503891945, | |
| "learning_rate": 1.0926508775971995e-06, | |
| "loss": 0.29001967906951903, | |
| "num_input_tokens_seen": 186004920, | |
| "step": 1825, | |
| "train_runtime": 25108.8304, | |
| "train_tokens_per_second": 7407.948 | |
| }, | |
| { | |
| "epoch": 1.5762273901808785, | |
| "grad_norm": 0.5896126682457972, | |
| "learning_rate": 1.0716360638988416e-06, | |
| "loss": 0.2898190259933472, | |
| "num_input_tokens_seen": 186528776, | |
| "step": 1830, | |
| "train_runtime": 25173.4594, | |
| "train_tokens_per_second": 7409.739 | |
| }, | |
| { | |
| "epoch": 1.5805340223944875, | |
| "grad_norm": 0.6250987792113757, | |
| "learning_rate": 1.0508010235798904e-06, | |
| "loss": 0.3022593975067139, | |
| "num_input_tokens_seen": 187030096, | |
| "step": 1835, | |
| "train_runtime": 25237.5065, | |
| "train_tokens_per_second": 7410.799 | |
| }, | |
| { | |
| "epoch": 1.5848406546080964, | |
| "grad_norm": 0.6978498804363219, | |
| "learning_rate": 1.0301467101124956e-06, | |
| "loss": 0.29773921966552735, | |
| "num_input_tokens_seen": 187530704, | |
| "step": 1840, | |
| "train_runtime": 25300.156, | |
| "train_tokens_per_second": 7412.235 | |
| }, | |
| { | |
| "epoch": 1.5891472868217056, | |
| "grad_norm": 0.733936599658224, | |
| "learning_rate": 1.0096740686982192e-06, | |
| "loss": 0.2982914447784424, | |
| "num_input_tokens_seen": 187997312, | |
| "step": 1845, | |
| "train_runtime": 25374.7743, | |
| "train_tokens_per_second": 7408.827 | |
| }, | |
| { | |
| "epoch": 1.5934539190353143, | |
| "grad_norm": 0.6380738781500264, | |
| "learning_rate": 9.893840362247809e-07, | |
| "loss": 0.292753005027771, | |
| "num_input_tokens_seen": 188486784, | |
| "step": 1850, | |
| "train_runtime": 25443.3247, | |
| "train_tokens_per_second": 7408.104 | |
| }, | |
| { | |
| "epoch": 1.5977605512489235, | |
| "grad_norm": 0.6343838191463229, | |
| "learning_rate": 9.692775412231863e-07, | |
| "loss": 0.3059796571731567, | |
| "num_input_tokens_seen": 189005680, | |
| "step": 1855, | |
| "train_runtime": 25518.9616, | |
| "train_tokens_per_second": 7406.48 | |
| }, | |
| { | |
| "epoch": 1.6020671834625322, | |
| "grad_norm": 0.673292925225458, | |
| "learning_rate": 9.493555038252273e-07, | |
| "loss": 0.29838857650756834, | |
| "num_input_tokens_seen": 189510720, | |
| "step": 1860, | |
| "train_runtime": 25583.3652, | |
| "train_tokens_per_second": 7407.576 | |
| }, | |
| { | |
| "epoch": 1.6063738156761413, | |
| "grad_norm": 0.7066024445607715, | |
| "learning_rate": 9.296188357213804e-07, | |
| "loss": 0.3135526180267334, | |
| "num_input_tokens_seen": 190005232, | |
| "step": 1865, | |
| "train_runtime": 25655.2288, | |
| "train_tokens_per_second": 7406.102 | |
| }, | |
| { | |
| "epoch": 1.61068044788975, | |
| "grad_norm": 0.6451799213539555, | |
| "learning_rate": 9.100684401190829e-07, | |
| "loss": 0.2981297492980957, | |
| "num_input_tokens_seen": 190519568, | |
| "step": 1870, | |
| "train_runtime": 25729.8902, | |
| "train_tokens_per_second": 7404.601 | |
| }, | |
| { | |
| "epoch": 1.6149870801033592, | |
| "grad_norm": 0.6386914239692977, | |
| "learning_rate": 8.907052117013981e-07, | |
| "loss": 0.3049411058425903, | |
| "num_input_tokens_seen": 191051376, | |
| "step": 1875, | |
| "train_runtime": 25804.946, | |
| "train_tokens_per_second": 7403.673 | |
| }, | |
| { | |
| "epoch": 1.6192937123169682, | |
| "grad_norm": 0.7923656421587169, | |
| "learning_rate": 8.715300365860784e-07, | |
| "loss": 0.2883660316467285, | |
| "num_input_tokens_seen": 191555640, | |
| "step": 1880, | |
| "train_runtime": 25872.9743, | |
| "train_tokens_per_second": 7403.696 | |
| }, | |
| { | |
| "epoch": 1.623600344530577, | |
| "grad_norm": 0.5861974629399523, | |
| "learning_rate": 8.525437922850033e-07, | |
| "loss": 0.2918602228164673, | |
| "num_input_tokens_seen": 192040232, | |
| "step": 1885, | |
| "train_runtime": 25940.9831, | |
| "train_tokens_per_second": 7402.967 | |
| }, | |
| { | |
| "epoch": 1.627906976744186, | |
| "grad_norm": 0.6536777995059396, | |
| "learning_rate": 8.337473476640318e-07, | |
| "loss": 0.296920108795166, | |
| "num_input_tokens_seen": 192555328, | |
| "step": 1890, | |
| "train_runtime": 25999.4311, | |
| "train_tokens_per_second": 7406.136 | |
| }, | |
| { | |
| "epoch": 1.632213608957795, | |
| "grad_norm": 0.709106460699193, | |
| "learning_rate": 8.151415629032338e-07, | |
| "loss": 0.3084681749343872, | |
| "num_input_tokens_seen": 193047904, | |
| "step": 1895, | |
| "train_runtime": 26071.7257, | |
| "train_tokens_per_second": 7404.493 | |
| }, | |
| { | |
| "epoch": 1.636520241171404, | |
| "grad_norm": 0.6265073195440819, | |
| "learning_rate": 7.967272894575312e-07, | |
| "loss": 0.3049050807952881, | |
| "num_input_tokens_seen": 193578488, | |
| "step": 1900, | |
| "train_runtime": 26142.2043, | |
| "train_tokens_per_second": 7404.827 | |
| }, | |
| { | |
| "epoch": 1.6408268733850129, | |
| "grad_norm": 0.5901143607833178, | |
| "learning_rate": 7.785053700177275e-07, | |
| "loss": 0.31264266967773435, | |
| "num_input_tokens_seen": 194106056, | |
| "step": 1905, | |
| "train_runtime": 26207.0856, | |
| "train_tokens_per_second": 7406.625 | |
| }, | |
| { | |
| "epoch": 1.645133505598622, | |
| "grad_norm": 0.6070076296317176, | |
| "learning_rate": 7.604766384719487e-07, | |
| "loss": 0.32028186321258545, | |
| "num_input_tokens_seen": 194637600, | |
| "step": 1910, | |
| "train_runtime": 26271.2516, | |
| "train_tokens_per_second": 7408.768 | |
| }, | |
| { | |
| "epoch": 1.6494401378122308, | |
| "grad_norm": 0.6800282511844107, | |
| "learning_rate": 7.426419198674773e-07, | |
| "loss": 0.29713547229766846, | |
| "num_input_tokens_seen": 195135888, | |
| "step": 1915, | |
| "train_runtime": 26344.3031, | |
| "train_tokens_per_second": 7407.138 | |
| }, | |
| { | |
| "epoch": 1.65374677002584, | |
| "grad_norm": 0.648137387161232, | |
| "learning_rate": 7.250020303730015e-07, | |
| "loss": 0.318635368347168, | |
| "num_input_tokens_seen": 195658344, | |
| "step": 1920, | |
| "train_runtime": 26410.9309, | |
| "train_tokens_per_second": 7408.234 | |
| }, | |
| { | |
| "epoch": 1.6580534022394486, | |
| "grad_norm": 0.7101813432427155, | |
| "learning_rate": 7.075577772412607e-07, | |
| "loss": 0.29038522243499754, | |
| "num_input_tokens_seen": 196159872, | |
| "step": 1925, | |
| "train_runtime": 26475.9547, | |
| "train_tokens_per_second": 7408.982 | |
| }, | |
| { | |
| "epoch": 1.6623600344530578, | |
| "grad_norm": 0.5997169278780675, | |
| "learning_rate": 6.903099587721024e-07, | |
| "loss": 0.3107494831085205, | |
| "num_input_tokens_seen": 196617976, | |
| "step": 1930, | |
| "train_runtime": 26543.6519, | |
| "train_tokens_per_second": 7407.345 | |
| }, | |
| { | |
| "epoch": 1.6666666666666665, | |
| "grad_norm": 0.5757077293663654, | |
| "learning_rate": 6.732593642759533e-07, | |
| "loss": 0.29712848663330077, | |
| "num_input_tokens_seen": 197100472, | |
| "step": 1935, | |
| "train_runtime": 26613.1626, | |
| "train_tokens_per_second": 7406.127 | |
| }, | |
| { | |
| "epoch": 1.6709732988802757, | |
| "grad_norm": 0.6672790463782956, | |
| "learning_rate": 6.564067740376951e-07, | |
| "loss": 0.29893629550933837, | |
| "num_input_tokens_seen": 197626872, | |
| "step": 1940, | |
| "train_runtime": 26679.8578, | |
| "train_tokens_per_second": 7407.344 | |
| }, | |
| { | |
| "epoch": 1.6752799310938846, | |
| "grad_norm": 0.6497288298918169, | |
| "learning_rate": 6.397529592809615e-07, | |
| "loss": 0.31168351173400877, | |
| "num_input_tokens_seen": 198123984, | |
| "step": 1945, | |
| "train_runtime": 26746.7628, | |
| "train_tokens_per_second": 7407.4 | |
| }, | |
| { | |
| "epoch": 1.6795865633074936, | |
| "grad_norm": 0.8599577731412777, | |
| "learning_rate": 6.232986821328374e-07, | |
| "loss": 0.29562864303588865, | |
| "num_input_tokens_seen": 198655680, | |
| "step": 1950, | |
| "train_runtime": 26823.6042, | |
| "train_tokens_per_second": 7406.003 | |
| }, | |
| { | |
| "epoch": 1.6838931955211025, | |
| "grad_norm": 0.650247687298536, | |
| "learning_rate": 6.070446955889853e-07, | |
| "loss": 0.2916395664215088, | |
| "num_input_tokens_seen": 199167544, | |
| "step": 1955, | |
| "train_runtime": 26896.5949, | |
| "train_tokens_per_second": 7404.935 | |
| }, | |
| { | |
| "epoch": 1.6881998277347114, | |
| "grad_norm": 0.642603913967143, | |
| "learning_rate": 5.909917434791884e-07, | |
| "loss": 0.30132670402526857, | |
| "num_input_tokens_seen": 199673096, | |
| "step": 1960, | |
| "train_runtime": 26963.0457, | |
| "train_tokens_per_second": 7405.436 | |
| }, | |
| { | |
| "epoch": 1.6925064599483204, | |
| "grad_norm": 0.6050771009316471, | |
| "learning_rate": 5.75140560433306e-07, | |
| "loss": 0.3065808296203613, | |
| "num_input_tokens_seen": 200183744, | |
| "step": 1965, | |
| "train_runtime": 27028.667, | |
| "train_tokens_per_second": 7406.349 | |
| }, | |
| { | |
| "epoch": 1.6968130921619293, | |
| "grad_norm": 0.7019989661295893, | |
| "learning_rate": 5.594918718476589e-07, | |
| "loss": 0.30456061363220216, | |
| "num_input_tokens_seen": 200697696, | |
| "step": 1970, | |
| "train_runtime": 27101.6197, | |
| "train_tokens_per_second": 7405.376 | |
| }, | |
| { | |
| "epoch": 1.7011197243755385, | |
| "grad_norm": 0.6625920188017488, | |
| "learning_rate": 5.440463938518304e-07, | |
| "loss": 0.3014059066772461, | |
| "num_input_tokens_seen": 201206880, | |
| "step": 1975, | |
| "train_runtime": 27172.9204, | |
| "train_tokens_per_second": 7404.684 | |
| }, | |
| { | |
| "epoch": 1.7054263565891472, | |
| "grad_norm": 0.6748217547745429, | |
| "learning_rate": 5.288048332758961e-07, | |
| "loss": 0.2906249284744263, | |
| "num_input_tokens_seen": 201692448, | |
| "step": 1980, | |
| "train_runtime": 27245.0858, | |
| "train_tokens_per_second": 7402.893 | |
| }, | |
| { | |
| "epoch": 1.7097329888027564, | |
| "grad_norm": 0.6493936382897342, | |
| "learning_rate": 5.137678876180746e-07, | |
| "loss": 0.3080711841583252, | |
| "num_input_tokens_seen": 202206200, | |
| "step": 1985, | |
| "train_runtime": 27302.2262, | |
| "train_tokens_per_second": 7406.217 | |
| }, | |
| { | |
| "epoch": 1.714039621016365, | |
| "grad_norm": 0.6134604818431901, | |
| "learning_rate": 4.989362450128133e-07, | |
| "loss": 0.3220229148864746, | |
| "num_input_tokens_seen": 202716392, | |
| "step": 1990, | |
| "train_runtime": 27369.6446, | |
| "train_tokens_per_second": 7406.614 | |
| }, | |
| { | |
| "epoch": 1.7183462532299743, | |
| "grad_norm": 0.646962297611449, | |
| "learning_rate": 4.843105841992895e-07, | |
| "loss": 0.31665678024291993, | |
| "num_input_tokens_seen": 203222616, | |
| "step": 1995, | |
| "train_runtime": 27428.8755, | |
| "train_tokens_per_second": 7409.076 | |
| }, | |
| { | |
| "epoch": 1.722652885443583, | |
| "grad_norm": 0.6371911776149713, | |
| "learning_rate": 4.698915744903559e-07, | |
| "loss": 0.2909515380859375, | |
| "num_input_tokens_seen": 203712976, | |
| "step": 2000, | |
| "train_runtime": 27503.273, | |
| "train_tokens_per_second": 7406.863 | |
| }, | |
| { | |
| "epoch": 1.7269595176571921, | |
| "grad_norm": 0.6718397087570519, | |
| "learning_rate": 4.5567987574190677e-07, | |
| "loss": 0.299733304977417, | |
| "num_input_tokens_seen": 204220640, | |
| "step": 2005, | |
| "train_runtime": 27582.7926, | |
| "train_tokens_per_second": 7403.915 | |
| }, | |
| { | |
| "epoch": 1.731266149870801, | |
| "grad_norm": 0.5969000639721187, | |
| "learning_rate": 4.416761383226864e-07, | |
| "loss": 0.2933858156204224, | |
| "num_input_tokens_seen": 204740576, | |
| "step": 2010, | |
| "train_runtime": 27655.0348, | |
| "train_tokens_per_second": 7403.374 | |
| }, | |
| { | |
| "epoch": 1.73557278208441, | |
| "grad_norm": 0.5803238112892645, | |
| "learning_rate": 4.278810030845193e-07, | |
| "loss": 0.2882672309875488, | |
| "num_input_tokens_seen": 205261256, | |
| "step": 2015, | |
| "train_runtime": 27722.013, | |
| "train_tokens_per_second": 7404.27 | |
| }, | |
| { | |
| "epoch": 1.739879414298019, | |
| "grad_norm": 0.5931274387562315, | |
| "learning_rate": 4.1429510133298714e-07, | |
| "loss": 0.300402307510376, | |
| "num_input_tokens_seen": 205763456, | |
| "step": 2020, | |
| "train_runtime": 27786.5499, | |
| "train_tokens_per_second": 7405.146 | |
| }, | |
| { | |
| "epoch": 1.744186046511628, | |
| "grad_norm": 0.6224395945864603, | |
| "learning_rate": 4.0091905479853865e-07, | |
| "loss": 0.30848591327667235, | |
| "num_input_tokens_seen": 206254632, | |
| "step": 2025, | |
| "train_runtime": 27854.9195, | |
| "train_tokens_per_second": 7404.603 | |
| }, | |
| { | |
| "epoch": 1.7484926787252368, | |
| "grad_norm": 0.7690853275407137, | |
| "learning_rate": 3.877534756080353e-07, | |
| "loss": 0.30381178855895996, | |
| "num_input_tokens_seen": 206761200, | |
| "step": 2030, | |
| "train_runtime": 27925.9518, | |
| "train_tokens_per_second": 7403.909 | |
| }, | |
| { | |
| "epoch": 1.7527993109388458, | |
| "grad_norm": 0.588121336370943, | |
| "learning_rate": 3.747989662567403e-07, | |
| "loss": 0.30392751693725584, | |
| "num_input_tokens_seen": 207254168, | |
| "step": 2035, | |
| "train_runtime": 27994.4378, | |
| "train_tokens_per_second": 7403.405 | |
| }, | |
| { | |
| "epoch": 1.757105943152455, | |
| "grad_norm": 0.6086100201180733, | |
| "learning_rate": 3.620561195807454e-07, | |
| "loss": 0.2981263637542725, | |
| "num_input_tokens_seen": 207724624, | |
| "step": 2040, | |
| "train_runtime": 28064.886, | |
| "train_tokens_per_second": 7401.584 | |
| }, | |
| { | |
| "epoch": 1.7614125753660637, | |
| "grad_norm": 0.5837314444107731, | |
| "learning_rate": 3.4952551872984295e-07, | |
| "loss": 0.2957895278930664, | |
| "num_input_tokens_seen": 208230432, | |
| "step": 2045, | |
| "train_runtime": 28140.0297, | |
| "train_tokens_per_second": 7399.794 | |
| }, | |
| { | |
| "epoch": 1.7657192075796728, | |
| "grad_norm": 0.7083602081207774, | |
| "learning_rate": 3.372077371408361e-07, | |
| "loss": 0.31419479846954346, | |
| "num_input_tokens_seen": 208756176, | |
| "step": 2050, | |
| "train_runtime": 28203.3818, | |
| "train_tokens_per_second": 7401.814 | |
| }, | |
| { | |
| "epoch": 1.7700258397932815, | |
| "grad_norm": 0.6007868998583191, | |
| "learning_rate": 3.2510333851129895e-07, | |
| "loss": 0.29912557601928713, | |
| "num_input_tokens_seen": 209283048, | |
| "step": 2055, | |
| "train_runtime": 28276.6448, | |
| "train_tokens_per_second": 7401.269 | |
| }, | |
| { | |
| "epoch": 1.7743324720068907, | |
| "grad_norm": 0.6111214072804273, | |
| "learning_rate": 3.132128767737824e-07, | |
| "loss": 0.2877781391143799, | |
| "num_input_tokens_seen": 209776608, | |
| "step": 2060, | |
| "train_runtime": 28339.5361, | |
| "train_tokens_per_second": 7402.26 | |
| }, | |
| { | |
| "epoch": 1.7786391042204994, | |
| "grad_norm": 0.6104558696682314, | |
| "learning_rate": 3.015368960704584e-07, | |
| "loss": 0.3083071708679199, | |
| "num_input_tokens_seen": 210290720, | |
| "step": 2065, | |
| "train_runtime": 28413.252, | |
| "train_tokens_per_second": 7401.149 | |
| }, | |
| { | |
| "epoch": 1.7829457364341086, | |
| "grad_norm": 0.5836728591190887, | |
| "learning_rate": 2.9007593072822246e-07, | |
| "loss": 0.30655722618103026, | |
| "num_input_tokens_seen": 210795064, | |
| "step": 2070, | |
| "train_runtime": 28480.1482, | |
| "train_tokens_per_second": 7401.474 | |
| }, | |
| { | |
| "epoch": 1.7872523686477175, | |
| "grad_norm": 0.5843802582004601, | |
| "learning_rate": 2.7883050523424214e-07, | |
| "loss": 0.2855481386184692, | |
| "num_input_tokens_seen": 211307816, | |
| "step": 2075, | |
| "train_runtime": 28550.624, | |
| "train_tokens_per_second": 7401.163 | |
| }, | |
| { | |
| "epoch": 1.7915590008613265, | |
| "grad_norm": 0.5970608151039677, | |
| "learning_rate": 2.67801134211953e-07, | |
| "loss": 0.3143181800842285, | |
| "num_input_tokens_seen": 211819656, | |
| "step": 2080, | |
| "train_runtime": 28622.1835, | |
| "train_tokens_per_second": 7400.541 | |
| }, | |
| { | |
| "epoch": 1.7958656330749354, | |
| "grad_norm": 0.631118629714427, | |
| "learning_rate": 2.569883223975078e-07, | |
| "loss": 0.3136578559875488, | |
| "num_input_tokens_seen": 212342848, | |
| "step": 2085, | |
| "train_runtime": 28690.2587, | |
| "train_tokens_per_second": 7401.218 | |
| }, | |
| { | |
| "epoch": 1.8001722652885443, | |
| "grad_norm": 0.6050935590065106, | |
| "learning_rate": 2.463925646166793e-07, | |
| "loss": 0.3073784351348877, | |
| "num_input_tokens_seen": 212865944, | |
| "step": 2090, | |
| "train_runtime": 28764.8887, | |
| "train_tokens_per_second": 7400.201 | |
| }, | |
| { | |
| "epoch": 1.8044788975021533, | |
| "grad_norm": 0.7462730804752461, | |
| "learning_rate": 2.3601434576221548e-07, | |
| "loss": 0.2910205364227295, | |
| "num_input_tokens_seen": 213312424, | |
| "step": 2095, | |
| "train_runtime": 28826.7975, | |
| "train_tokens_per_second": 7399.796 | |
| }, | |
| { | |
| "epoch": 1.8087855297157622, | |
| "grad_norm": 0.6890875242639481, | |
| "learning_rate": 2.2585414077164914e-07, | |
| "loss": 0.3124124050140381, | |
| "num_input_tokens_seen": 213861568, | |
| "step": 2100, | |
| "train_runtime": 28886.3363, | |
| "train_tokens_per_second": 7403.555 | |
| }, | |
| { | |
| "epoch": 1.8130921619293714, | |
| "grad_norm": 0.6451854669090437, | |
| "learning_rate": 2.1591241460556355e-07, | |
| "loss": 0.3035388469696045, | |
| "num_input_tokens_seen": 214400864, | |
| "step": 2105, | |
| "train_runtime": 28962.1347, | |
| "train_tokens_per_second": 7402.799 | |
| }, | |
| { | |
| "epoch": 1.8173987941429801, | |
| "grad_norm": 0.5932564357300775, | |
| "learning_rate": 2.0618962222631434e-07, | |
| "loss": 0.30562429428100585, | |
| "num_input_tokens_seen": 214933176, | |
| "step": 2110, | |
| "train_runtime": 29019.6315, | |
| "train_tokens_per_second": 7406.475 | |
| }, | |
| { | |
| "epoch": 1.8217054263565893, | |
| "grad_norm": 0.7273993480230436, | |
| "learning_rate": 1.9668620857720865e-07, | |
| "loss": 0.3033386945724487, | |
| "num_input_tokens_seen": 215387240, | |
| "step": 2115, | |
| "train_runtime": 29081.5024, | |
| "train_tokens_per_second": 7406.331 | |
| }, | |
| { | |
| "epoch": 1.826012058570198, | |
| "grad_norm": 0.5794672931344478, | |
| "learning_rate": 1.8740260856214388e-07, | |
| "loss": 0.30868115425109866, | |
| "num_input_tokens_seen": 215936824, | |
| "step": 2120, | |
| "train_runtime": 29150.3054, | |
| "train_tokens_per_second": 7407.704 | |
| }, | |
| { | |
| "epoch": 1.8303186907838072, | |
| "grad_norm": 0.644533623475115, | |
| "learning_rate": 1.7833924702570725e-07, | |
| "loss": 0.30875744819641116, | |
| "num_input_tokens_seen": 216480984, | |
| "step": 2125, | |
| "train_runtime": 29226.4831, | |
| "train_tokens_per_second": 7407.015 | |
| }, | |
| { | |
| "epoch": 1.8346253229974159, | |
| "grad_norm": 0.6156843271663307, | |
| "learning_rate": 1.6949653873372828e-07, | |
| "loss": 0.2976231098175049, | |
| "num_input_tokens_seen": 216985056, | |
| "step": 2130, | |
| "train_runtime": 29303.3245, | |
| "train_tokens_per_second": 7404.793 | |
| }, | |
| { | |
| "epoch": 1.838931955211025, | |
| "grad_norm": 0.5929671577727561, | |
| "learning_rate": 1.6087488835430208e-07, | |
| "loss": 0.3137772560119629, | |
| "num_input_tokens_seen": 217567072, | |
| "step": 2135, | |
| "train_runtime": 29380.4773, | |
| "train_tokens_per_second": 7405.158 | |
| }, | |
| { | |
| "epoch": 1.843238587424634, | |
| "grad_norm": 0.6832951069959544, | |
| "learning_rate": 1.5247469043927153e-07, | |
| "loss": 0.304307746887207, | |
| "num_input_tokens_seen": 218066976, | |
| "step": 2140, | |
| "train_runtime": 29445.7965, | |
| "train_tokens_per_second": 7405.708 | |
| }, | |
| { | |
| "epoch": 1.847545219638243, | |
| "grad_norm": 0.6130447989309619, | |
| "learning_rate": 1.4429632940616721e-07, | |
| "loss": 0.31688547134399414, | |
| "num_input_tokens_seen": 218547776, | |
| "step": 2145, | |
| "train_runtime": 29519.0458, | |
| "train_tokens_per_second": 7403.619 | |
| }, | |
| { | |
| "epoch": 1.8518518518518519, | |
| "grad_norm": 0.5920284879807376, | |
| "learning_rate": 1.3634017952061872e-07, | |
| "loss": 0.2871793270111084, | |
| "num_input_tokens_seen": 219097408, | |
| "step": 2150, | |
| "train_runtime": 29593.4565, | |
| "train_tokens_per_second": 7403.576 | |
| }, | |
| { | |
| "epoch": 1.8561584840654608, | |
| "grad_norm": 0.5989232424009299, | |
| "learning_rate": 1.2860660487922616e-07, | |
| "loss": 0.3016232967376709, | |
| "num_input_tokens_seen": 219617128, | |
| "step": 2155, | |
| "train_runtime": 29659.0858, | |
| "train_tokens_per_second": 7404.717 | |
| }, | |
| { | |
| "epoch": 1.8604651162790697, | |
| "grad_norm": 0.6274792319418983, | |
| "learning_rate": 1.2109595939289676e-07, | |
| "loss": 0.3069549560546875, | |
| "num_input_tokens_seen": 220118992, | |
| "step": 2160, | |
| "train_runtime": 29729.4276, | |
| "train_tokens_per_second": 7404.078 | |
| }, | |
| { | |
| "epoch": 1.8647717484926787, | |
| "grad_norm": 0.639624885028358, | |
| "learning_rate": 1.1380858677065177e-07, | |
| "loss": 0.29118402004241944, | |
| "num_input_tokens_seen": 220620384, | |
| "step": 2165, | |
| "train_runtime": 29800.9726, | |
| "train_tokens_per_second": 7403.127 | |
| }, | |
| { | |
| "epoch": 1.8690783807062878, | |
| "grad_norm": 0.6263759770644098, | |
| "learning_rate": 1.0674482050389457e-07, | |
| "loss": 0.3211177349090576, | |
| "num_input_tokens_seen": 221104624, | |
| "step": 2170, | |
| "train_runtime": 29867.1222, | |
| "train_tokens_per_second": 7402.944 | |
| }, | |
| { | |
| "epoch": 1.8733850129198966, | |
| "grad_norm": 0.5850849374408646, | |
| "learning_rate": 9.990498385115066e-08, | |
| "loss": 0.2935049533843994, | |
| "num_input_tokens_seen": 221664440, | |
| "step": 2175, | |
| "train_runtime": 29933.121, | |
| "train_tokens_per_second": 7405.323 | |
| }, | |
| { | |
| "epoch": 1.8776916451335057, | |
| "grad_norm": 0.6187947000777021, | |
| "learning_rate": 9.328938982327396e-08, | |
| "loss": 0.2967803478240967, | |
| "num_input_tokens_seen": 222166280, | |
| "step": 2180, | |
| "train_runtime": 29994.8442, | |
| "train_tokens_per_second": 7406.816 | |
| }, | |
| { | |
| "epoch": 1.8819982773471144, | |
| "grad_norm": 0.6117271333511343, | |
| "learning_rate": 8.68983411691221e-08, | |
| "loss": 0.3003566265106201, | |
| "num_input_tokens_seen": 222663120, | |
| "step": 2185, | |
| "train_runtime": 30054.1743, | |
| "train_tokens_per_second": 7408.725 | |
| }, | |
| { | |
| "epoch": 1.8863049095607236, | |
| "grad_norm": 0.6274877192256639, | |
| "learning_rate": 8.073213036170258e-08, | |
| "loss": 0.30220422744750974, | |
| "num_input_tokens_seen": 223174608, | |
| "step": 2190, | |
| "train_runtime": 30117.6604, | |
| "train_tokens_per_second": 7410.091 | |
| }, | |
| { | |
| "epoch": 1.8906115417743323, | |
| "grad_norm": 0.5663763479699041, | |
| "learning_rate": 7.479103958478783e-08, | |
| "loss": 0.3004784107208252, | |
| "num_input_tokens_seen": 223724416, | |
| "step": 2195, | |
| "train_runtime": 30178.196, | |
| "train_tokens_per_second": 7413.446 | |
| }, | |
| { | |
| "epoch": 1.8949181739879415, | |
| "grad_norm": 0.6615913855044907, | |
| "learning_rate": 6.907534072000177e-08, | |
| "loss": 0.30257329940795896, | |
| "num_input_tokens_seen": 224257552, | |
| "step": 2200, | |
| "train_runtime": 30257.9783, | |
| "train_tokens_per_second": 7411.518 | |
| }, | |
| { | |
| "epoch": 1.8992248062015504, | |
| "grad_norm": 0.554939875230265, | |
| "learning_rate": 6.358529533437796e-08, | |
| "loss": 0.2995903491973877, | |
| "num_input_tokens_seen": 224783568, | |
| "step": 2205, | |
| "train_runtime": 30331.9171, | |
| "train_tokens_per_second": 7410.793 | |
| }, | |
| { | |
| "epoch": 1.9035314384151594, | |
| "grad_norm": 0.6083628550269634, | |
| "learning_rate": 5.832115466838872e-08, | |
| "loss": 0.29779205322265623, | |
| "num_input_tokens_seen": 225281672, | |
| "step": 2210, | |
| "train_runtime": 30407.547, | |
| "train_tokens_per_second": 7408.742 | |
| }, | |
| { | |
| "epoch": 1.9078380706287683, | |
| "grad_norm": 0.6279252265728321, | |
| "learning_rate": 5.3283159624448745e-08, | |
| "loss": 0.3170241117477417, | |
| "num_input_tokens_seen": 225814656, | |
| "step": 2215, | |
| "train_runtime": 30466.2688, | |
| "train_tokens_per_second": 7411.956 | |
| }, | |
| { | |
| "epoch": 1.9121447028423773, | |
| "grad_norm": 0.7185275451441703, | |
| "learning_rate": 4.847154075589e-08, | |
| "loss": 0.3137502670288086, | |
| "num_input_tokens_seen": 226330440, | |
| "step": 2220, | |
| "train_runtime": 30533.9136, | |
| "train_tokens_per_second": 7412.428 | |
| }, | |
| { | |
| "epoch": 1.9164513350559862, | |
| "grad_norm": 0.5936044355682394, | |
| "learning_rate": 4.3886518256411325e-08, | |
| "loss": 0.3071382999420166, | |
| "num_input_tokens_seen": 226837032, | |
| "step": 2225, | |
| "train_runtime": 30605.5094, | |
| "train_tokens_per_second": 7411.64 | |
| }, | |
| { | |
| "epoch": 1.9207579672695951, | |
| "grad_norm": 0.6318773911159581, | |
| "learning_rate": 3.9528301950000345e-08, | |
| "loss": 0.30357215404510496, | |
| "num_input_tokens_seen": 227345616, | |
| "step": 2230, | |
| "train_runtime": 30681.0349, | |
| "train_tokens_per_second": 7409.972 | |
| }, | |
| { | |
| "epoch": 1.9250645994832043, | |
| "grad_norm": 0.5951318413675115, | |
| "learning_rate": 3.539709128133395e-08, | |
| "loss": 0.3153378963470459, | |
| "num_input_tokens_seen": 227840184, | |
| "step": 2235, | |
| "train_runtime": 30751.0125, | |
| "train_tokens_per_second": 7409.193 | |
| }, | |
| { | |
| "epoch": 1.929371231696813, | |
| "grad_norm": 0.6707938307520876, | |
| "learning_rate": 3.149307530664891e-08, | |
| "loss": 0.30716452598571775, | |
| "num_input_tokens_seen": 228355216, | |
| "step": 2240, | |
| "train_runtime": 30817.8728, | |
| "train_tokens_per_second": 7409.831 | |
| }, | |
| { | |
| "epoch": 1.9336778639104222, | |
| "grad_norm": 0.6127756791716347, | |
| "learning_rate": 2.7816432685091598e-08, | |
| "loss": 0.2936999320983887, | |
| "num_input_tokens_seen": 228882600, | |
| "step": 2245, | |
| "train_runtime": 30882.7718, | |
| "train_tokens_per_second": 7411.336 | |
| }, | |
| { | |
| "epoch": 1.937984496124031, | |
| "grad_norm": 0.551559122734471, | |
| "learning_rate": 2.4367331670539506e-08, | |
| "loss": 0.3067632675170898, | |
| "num_input_tokens_seen": 229400664, | |
| "step": 2250, | |
| "train_runtime": 30954.957, | |
| "train_tokens_per_second": 7410.789 | |
| }, | |
| { | |
| "epoch": 1.94229112833764, | |
| "grad_norm": 0.6350936400123717, | |
| "learning_rate": 2.1145930103904645e-08, | |
| "loss": 0.3079858303070068, | |
| "num_input_tokens_seen": 229912008, | |
| "step": 2255, | |
| "train_runtime": 31022.3371, | |
| "train_tokens_per_second": 7411.176 | |
| }, | |
| { | |
| "epoch": 1.9465977605512488, | |
| "grad_norm": 0.6737908112781935, | |
| "learning_rate": 1.8152375405909305e-08, | |
| "loss": 0.3009981632232666, | |
| "num_input_tokens_seen": 230446552, | |
| "step": 2260, | |
| "train_runtime": 31098.5518, | |
| "train_tokens_per_second": 7410.202 | |
| }, | |
| { | |
| "epoch": 1.950904392764858, | |
| "grad_norm": 0.7144758716926696, | |
| "learning_rate": 1.538680457033814e-08, | |
| "loss": 0.3050046920776367, | |
| "num_input_tokens_seen": 230974000, | |
| "step": 2265, | |
| "train_runtime": 31173.146, | |
| "train_tokens_per_second": 7409.39 | |
| }, | |
| { | |
| "epoch": 1.9552110249784669, | |
| "grad_norm": 0.6702413804814763, | |
| "learning_rate": 1.284934415776984e-08, | |
| "loss": 0.3118910789489746, | |
| "num_input_tokens_seen": 231510712, | |
| "step": 2270, | |
| "train_runtime": 31249.4408, | |
| "train_tokens_per_second": 7408.475 | |
| }, | |
| { | |
| "epoch": 1.9595176571920758, | |
| "grad_norm": 0.6610446821494167, | |
| "learning_rate": 1.0540110289786742e-08, | |
| "loss": 0.28809175491333006, | |
| "num_input_tokens_seen": 231960472, | |
| "step": 2275, | |
| "train_runtime": 31314.9022, | |
| "train_tokens_per_second": 7407.351 | |
| }, | |
| { | |
| "epoch": 1.9638242894056848, | |
| "grad_norm": 0.6292947995618373, | |
| "learning_rate": 8.459208643659122e-09, | |
| "loss": 0.3013895511627197, | |
| "num_input_tokens_seen": 232466016, | |
| "step": 2280, | |
| "train_runtime": 31390.756, | |
| "train_tokens_per_second": 7405.556 | |
| }, | |
| { | |
| "epoch": 1.9681309216192937, | |
| "grad_norm": 0.6388156769135576, | |
| "learning_rate": 6.606734447507923e-09, | |
| "loss": 0.2956396579742432, | |
| "num_input_tokens_seen": 233001544, | |
| "step": 2285, | |
| "train_runtime": 31455.4291, | |
| "train_tokens_per_second": 7407.355 | |
| }, | |
| { | |
| "epoch": 1.9724375538329026, | |
| "grad_norm": 0.7338122921014575, | |
| "learning_rate": 4.982772475951026e-09, | |
| "loss": 0.3004451751708984, | |
| "num_input_tokens_seen": 233486880, | |
| "step": 2290, | |
| "train_runtime": 31524.8943, | |
| "train_tokens_per_second": 7406.429 | |
| }, | |
| { | |
| "epoch": 1.9767441860465116, | |
| "grad_norm": 0.6784313338377094, | |
| "learning_rate": 3.587397046219132e-09, | |
| "loss": 0.3195258617401123, | |
| "num_input_tokens_seen": 233992944, | |
| "step": 2295, | |
| "train_runtime": 31592.025, | |
| "train_tokens_per_second": 7406.709 | |
| }, | |
| { | |
| "epoch": 1.9810508182601207, | |
| "grad_norm": 0.5996807721646643, | |
| "learning_rate": 2.4206720147573748e-09, | |
| "loss": 0.3012960433959961, | |
| "num_input_tokens_seen": 234505824, | |
| "step": 2300, | |
| "train_runtime": 31662.022, | |
| "train_tokens_per_second": 7406.533 | |
| }, | |
| { | |
| "epoch": 1.9853574504737295, | |
| "grad_norm": 0.612165146123787, | |
| "learning_rate": 1.4826507743032071e-09, | |
| "loss": 0.2931736707687378, | |
| "num_input_tokens_seen": 234988896, | |
| "step": 2305, | |
| "train_runtime": 31727.6848, | |
| "train_tokens_per_second": 7406.431 | |
| }, | |
| { | |
| "epoch": 1.9896640826873386, | |
| "grad_norm": 0.5687329803521689, | |
| "learning_rate": 7.733762514411381e-10, | |
| "loss": 0.3052358865737915, | |
| "num_input_tokens_seen": 235490216, | |
| "step": 2310, | |
| "train_runtime": 31802.994, | |
| "train_tokens_per_second": 7404.656 | |
| }, | |
| { | |
| "epoch": 1.9939707149009473, | |
| "grad_norm": 0.6676211550092361, | |
| "learning_rate": 2.928809046398584e-10, | |
| "loss": 0.29867162704467776, | |
| "num_input_tokens_seen": 236010080, | |
| "step": 2315, | |
| "train_runtime": 31879.2306, | |
| "train_tokens_per_second": 7403.255 | |
| }, | |
| { | |
| "epoch": 1.9982773471145565, | |
| "grad_norm": 0.5980690602806559, | |
| "learning_rate": 4.118672276620661e-11, | |
| "loss": 0.3062272548675537, | |
| "num_input_tokens_seen": 236545192, | |
| "step": 2320, | |
| "train_runtime": 31958.4063, | |
| "train_tokens_per_second": 7401.658 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "num_input_tokens_seen": 236749552, | |
| "step": 2322, | |
| "total_flos": 437831363461120.0, | |
| "train_loss": 0.3453142216989031, | |
| "train_runtime": 32003.0179, | |
| "train_samples_per_second": 0.58, | |
| "train_steps_per_second": 0.073 | |
| } | |
| ], | |
| "logging_steps": 5, | |
| "max_steps": 2322, | |
| "num_input_tokens_seen": 236749552, | |
| "num_train_epochs": 2, | |
| "save_steps": 0, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 437831363461120.0, | |
| "train_batch_size": 1, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |